Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_format.h"
9 : #include "xfs_log_format.h"
10 : #include "xfs_shared.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_defer.h"
15 : #include "xfs_btree.h"
16 : #include "xfs_rmap.h"
17 : #include "xfs_alloc_btree.h"
18 : #include "xfs_alloc.h"
19 : #include "xfs_extent_busy.h"
20 : #include "xfs_errortag.h"
21 : #include "xfs_error.h"
22 : #include "xfs_trace.h"
23 : #include "xfs_trans.h"
24 : #include "xfs_buf_item.h"
25 : #include "xfs_log.h"
26 : #include "xfs_ag.h"
27 : #include "xfs_ag_resv.h"
28 : #include "xfs_bmap.h"
29 : #include "xfs_health.h"
30 :
31 : struct kmem_cache *xfs_extfree_item_cache;
32 :
33 : struct workqueue_struct *xfs_alloc_wq;
34 :
35 : #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
36 :
37 : #define XFSA_FIXUP_BNO_OK 1
38 : #define XFSA_FIXUP_CNT_OK 2
39 :
40 : /*
41 : * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
42 : * the beginning of the block for a proper header with the location information
43 : * and CRC.
44 : */
45 : unsigned int
46 7705953 : xfs_agfl_size(
47 : struct xfs_mount *mp)
48 : {
49 852444707 : unsigned int size = mp->m_sb.sb_sectsize;
50 :
51 10967103 : if (xfs_has_crc(mp))
52 850244287 : size -= sizeof(struct xfs_agfl);
53 :
54 852444707 : return size / sizeof(xfs_agblock_t);
55 : }
56 :
57 : unsigned int
58 149913 : xfs_refc_block(
59 : struct xfs_mount *mp)
60 : {
61 149913 : if (xfs_has_rmapbt(mp))
62 147631 : return XFS_RMAP_BLOCK(mp) + 1;
63 2282 : if (xfs_has_finobt(mp))
64 0 : return XFS_FIBT_BLOCK(mp) + 1;
65 2282 : return XFS_IBT_BLOCK(mp) + 1;
66 : }
67 :
68 : xfs_extlen_t
69 35484 : xfs_prealloc_blocks(
70 : struct xfs_mount *mp)
71 : {
72 35484 : if (xfs_has_reflink(mp))
73 35224 : return xfs_refc_block(mp) + 1;
74 260 : if (xfs_has_rmapbt(mp))
75 5 : return XFS_RMAP_BLOCK(mp) + 1;
76 255 : if (xfs_has_finobt(mp))
77 207 : return XFS_FIBT_BLOCK(mp) + 1;
78 48 : return XFS_IBT_BLOCK(mp) + 1;
79 : }
80 :
81 : /*
82 : * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
83 : * guarantee that we can refill the AGFL prior to allocating space in a nearly
84 : * full AG. Although the space described by the free space btrees, the
85 : * blocks used by the freesp btrees themselves, and the blocks owned by the
86 : * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
87 : * free space in the AG drop so low that the free space btrees cannot refill an
88 : * empty AGFL up to the minimum level. Rather than grind through empty AGs
89 : * until the fs goes down, we subtract this many AG blocks from the incore
90 : * fdblocks to ensure user allocation does not overcommit the space the
91 : * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
92 : * withhold space from xfs_mod_fdblocks, so we do not account for that here.
93 : */
94 : #define XFS_ALLOCBT_AGFL_RESERVE 4
95 :
96 : /*
97 : * Compute the number of blocks that we set aside to guarantee the ability to
98 : * refill the AGFL and handle a full bmap btree split.
99 : *
100 : * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
101 : * AGF buffer (PV 947395), we place constraints on the relationship among
102 : * actual allocations for data blocks, freelist blocks, and potential file data
103 : * bmap btree blocks. However, these restrictions may result in no actual space
104 : * allocated for a delayed extent, for example, a data block in a certain AG is
105 : * allocated but there is no additional block for the additional bmap btree
106 : * block due to a split of the bmap btree of the file. The result of this may
107 : * lead to an infinite loop when the file gets flushed to disk and all delayed
108 : * extents need to be actually allocated. To get around this, we explicitly set
109 : * aside a few blocks which will not be reserved in delayed allocation.
110 : *
111 : * For each AG, we need to reserve enough blocks to replenish a totally empty
112 : * AGFL and 4 more to handle a potential split of the file's bmap btree.
113 : */
114 : unsigned int
115 35534 : xfs_alloc_set_aside(
116 : struct xfs_mount *mp)
117 : {
118 35534 : return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
119 : }
120 :
121 : /*
122 : * When deciding how much space to allocate out of an AG, we limit the
123 : * allocation maximum size to the size the AG. However, we cannot use all the
124 : * blocks in the AG - some are permanently used by metadata. These
125 : * blocks are generally:
126 : * - the AG superblock, AGF, AGI and AGFL
127 : * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
128 : * the AGI free inode and rmap btree root blocks.
129 : * - blocks on the AGFL according to xfs_alloc_set_aside() limits
130 : * - the rmapbt root block
131 : *
132 : * The AG headers are sector sized, so the amount of space they take up is
133 : * dependent on filesystem geometry. The others are all single blocks.
134 : */
135 : unsigned int
136 24125 : xfs_alloc_ag_max_usable(
137 : struct xfs_mount *mp)
138 : {
139 24125 : unsigned int blocks;
140 :
141 24125 : blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
142 24125 : blocks += XFS_ALLOCBT_AGFL_RESERVE;
143 24125 : blocks += 3; /* AGF, AGI btree root blocks */
144 24125 : if (xfs_has_finobt(mp))
145 24075 : blocks++; /* finobt root block */
146 24125 : if (xfs_has_rmapbt(mp))
147 23872 : blocks++; /* rmap root block */
148 24125 : if (xfs_has_reflink(mp))
149 23867 : blocks++; /* refcount root block */
150 :
151 24125 : return mp->m_sb.sb_agblocks - blocks;
152 : }
153 :
154 : /*
155 : * Lookup the record equal to [bno, len] in the btree given by cur.
156 : */
157 : STATIC int /* error */
158 245026396 : xfs_alloc_lookup_eq(
159 : struct xfs_btree_cur *cur, /* btree cursor */
160 : xfs_agblock_t bno, /* starting block of extent */
161 : xfs_extlen_t len, /* length of extent */
162 : int *stat) /* success/failure */
163 : {
164 245026396 : int error;
165 :
166 245026396 : cur->bc_rec.a.ar_startblock = bno;
167 245026396 : cur->bc_rec.a.ar_blockcount = len;
168 245026396 : error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
169 245047076 : cur->bc_ag.abt.active = (*stat == 1);
170 245047076 : return error;
171 : }
172 :
173 : /*
174 : * Lookup the first record greater than or equal to [bno, len]
175 : * in the btree given by cur.
176 : */
177 : int /* error */
178 7634 : xfs_alloc_lookup_ge(
179 : struct xfs_btree_cur *cur, /* btree cursor */
180 : xfs_agblock_t bno, /* starting block of extent */
181 : xfs_extlen_t len, /* length of extent */
182 : int *stat) /* success/failure */
183 : {
184 349963065 : int error;
185 :
186 349963065 : cur->bc_rec.a.ar_startblock = bno;
187 349963065 : cur->bc_rec.a.ar_blockcount = len;
188 7634 : error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
189 349965677 : cur->bc_ag.abt.active = (*stat == 1);
190 349965677 : return error;
191 : }
192 :
193 : /*
194 : * Lookup the first record less than or equal to [bno, len]
195 : * in the btree given by cur.
196 : */
197 : int /* error */
198 383564800 : xfs_alloc_lookup_le(
199 : struct xfs_btree_cur *cur, /* btree cursor */
200 : xfs_agblock_t bno, /* starting block of extent */
201 : xfs_extlen_t len, /* length of extent */
202 : int *stat) /* success/failure */
203 : {
204 468434934 : int error;
205 468434934 : cur->bc_rec.a.ar_startblock = bno;
206 468434934 : cur->bc_rec.a.ar_blockcount = len;
207 383564800 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
208 468434455 : cur->bc_ag.abt.active = (*stat == 1);
209 468434455 : return error;
210 : }
211 :
212 : static inline bool
213 : xfs_alloc_cur_active(
214 : struct xfs_btree_cur *cur)
215 : {
216 9167794351 : return cur && cur->bc_ag.abt.active;
217 : }
218 :
219 : /*
220 : * Update the record referred to by cur to the value given
221 : * by [bno, len].
222 : * This either works (return 0) or gets an EFSCORRUPTED error.
223 : */
224 : STATIC int /* error */
225 67266676 : xfs_alloc_update(
226 : struct xfs_btree_cur *cur, /* btree cursor */
227 : xfs_agblock_t bno, /* starting block of extent */
228 : xfs_extlen_t len) /* length of extent */
229 : {
230 67266676 : union xfs_btree_rec rec;
231 :
232 67266676 : rec.alloc.ar_startblock = cpu_to_be32(bno);
233 67266676 : rec.alloc.ar_blockcount = cpu_to_be32(len);
234 67266676 : return xfs_btree_update(cur, &rec);
235 : }
236 :
237 : /* Convert the ondisk btree record to its incore representation. */
238 : void
239 6693932717 : xfs_alloc_btrec_to_irec(
240 : const union xfs_btree_rec *rec,
241 : struct xfs_alloc_rec_incore *irec)
242 : {
243 6693932717 : irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
244 6693932717 : irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
245 6693932717 : }
246 :
247 : inline xfs_failaddr_t
248 6737538018 : xfs_alloc_check_perag_irec(
249 : struct xfs_perag *pag,
250 : const struct xfs_alloc_rec_incore *irec)
251 : {
252 6737538018 : if (irec->ar_blockcount == 0)
253 0 : return __this_address;
254 :
255 : /* check for valid extent range, including overflow */
256 6737538018 : if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
257 0 : return __this_address;
258 :
259 : return NULL;
260 : }
261 :
262 : /* Simple checks for free space records. */
263 : xfs_failaddr_t
264 383153224 : xfs_alloc_check_irec(
265 : struct xfs_btree_cur *cur,
266 : const struct xfs_alloc_rec_incore *irec)
267 : {
268 383153224 : return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
269 : }
270 :
271 : static inline int
272 0 : xfs_alloc_complain_bad_rec(
273 : struct xfs_btree_cur *cur,
274 : xfs_failaddr_t fa,
275 : const struct xfs_alloc_rec_incore *irec)
276 : {
277 0 : struct xfs_mount *mp = cur->bc_mp;
278 :
279 0 : xfs_warn(mp,
280 : "%s Freespace BTree record corruption in AG %d detected at %pS!",
281 : cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
282 : cur->bc_ag.pag->pag_agno, fa);
283 0 : xfs_warn(mp,
284 : "start block 0x%x block count 0x%x", irec->ar_startblock,
285 : irec->ar_blockcount);
286 0 : xfs_btree_mark_sick(cur);
287 0 : return -EFSCORRUPTED;
288 : }
289 :
290 : /*
291 : * Get the data from the pointed-to record.
292 : */
293 : int /* error */
294 5360766486 : xfs_alloc_get_rec(
295 : struct xfs_btree_cur *cur, /* btree cursor */
296 : xfs_agblock_t *bno, /* output: starting block of extent */
297 : xfs_extlen_t *len, /* output: length of extent */
298 : int *stat) /* output: success/failure */
299 : {
300 5360766486 : struct xfs_alloc_rec_incore irec;
301 5360766486 : union xfs_btree_rec *rec;
302 5360766486 : xfs_failaddr_t fa;
303 5360766486 : int error;
304 :
305 5360766486 : error = xfs_btree_get_rec(cur, &rec, stat);
306 5358013942 : if (error || !(*stat))
307 : return error;
308 :
309 5359306543 : xfs_alloc_btrec_to_irec(rec, &irec);
310 5360720448 : fa = xfs_alloc_check_irec(cur, &irec);
311 5363198694 : if (fa)
312 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
313 :
314 5363198694 : *bno = irec.ar_startblock;
315 5363198694 : *len = irec.ar_blockcount;
316 5363198694 : return 0;
317 : }
318 :
319 : /*
320 : * Compute aligned version of the found extent.
321 : * Takes alignment and min length into account.
322 : */
323 : STATIC bool
324 4186222422 : xfs_alloc_compute_aligned(
325 : xfs_alloc_arg_t *args, /* allocation argument structure */
326 : xfs_agblock_t foundbno, /* starting block in found extent */
327 : xfs_extlen_t foundlen, /* length in found extent */
328 : xfs_agblock_t *resbno, /* result block number */
329 : xfs_extlen_t *reslen, /* result length */
330 : unsigned *busy_gen)
331 : {
332 4186222422 : xfs_agblock_t bno = foundbno;
333 4186222422 : xfs_extlen_t len = foundlen;
334 4186222422 : xfs_extlen_t diff;
335 4186222422 : bool busy;
336 :
337 : /* Trim busy sections out of found extent */
338 4186222422 : busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
339 :
340 : /*
341 : * If we have a largish extent that happens to start before min_agbno,
342 : * see if we can shift it into range...
343 : */
344 4190383617 : if (bno < args->min_agbno && bno + len > args->min_agbno) {
345 590 : diff = args->min_agbno - bno;
346 590 : if (len > diff) {
347 590 : bno += diff;
348 590 : len -= diff;
349 : }
350 : }
351 :
352 4190383617 : if (args->alignment > 1 && len >= args->minlen) {
353 11205336 : xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
354 :
355 11205336 : diff = aligned_bno - bno;
356 :
357 11205336 : *resbno = aligned_bno;
358 11205336 : *reslen = diff >= len ? 0 : len - diff;
359 : } else {
360 4179178281 : *resbno = bno;
361 4179178281 : *reslen = len;
362 : }
363 :
364 4190383617 : return busy;
365 : }
366 :
367 : /*
368 : * Compute best start block and diff for "near" allocations.
369 : * freelen >= wantlen already checked by caller.
370 : */
371 : STATIC xfs_extlen_t /* difference value (absolute) */
372 2206898444 : xfs_alloc_compute_diff(
373 : xfs_agblock_t wantbno, /* target starting block */
374 : xfs_extlen_t wantlen, /* target length */
375 : xfs_extlen_t alignment, /* target alignment */
376 : int datatype, /* are we allocating data? */
377 : xfs_agblock_t freebno, /* freespace's starting block */
378 : xfs_extlen_t freelen, /* freespace's length */
379 : xfs_agblock_t *newbnop) /* result: best start block from free */
380 : {
381 2206898444 : xfs_agblock_t freeend; /* end of freespace extent */
382 2206898444 : xfs_agblock_t newbno1; /* return block number */
383 2206898444 : xfs_agblock_t newbno2; /* other new block number */
384 2206898444 : xfs_extlen_t newlen1=0; /* length with newbno1 */
385 2206898444 : xfs_extlen_t newlen2=0; /* length with newbno2 */
386 2206898444 : xfs_agblock_t wantend; /* end of target extent */
387 2206898444 : bool userdata = datatype & XFS_ALLOC_USERDATA;
388 :
389 2206898444 : ASSERT(freelen >= wantlen);
390 2206898444 : freeend = freebno + freelen;
391 2206898444 : wantend = wantbno + wantlen;
392 : /*
393 : * We want to allocate from the start of a free extent if it is past
394 : * the desired block or if we are allocating user data and the free
395 : * extent is before desired block. The second case is there to allow
396 : * for contiguous allocation from the remaining free space if the file
397 : * grows in the short term.
398 : */
399 2206898444 : if (freebno >= wantbno || (userdata && freeend < wantend)) {
400 1787963537 : if ((newbno1 = roundup(freebno, alignment)) >= freeend)
401 0 : newbno1 = NULLAGBLOCK;
402 418934907 : } else if (freeend >= wantend && alignment > 1) {
403 0 : newbno1 = roundup(wantbno, alignment);
404 0 : newbno2 = newbno1 - alignment;
405 0 : if (newbno1 >= freeend)
406 : newbno1 = NULLAGBLOCK;
407 : else
408 0 : newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
409 0 : if (newbno2 < freebno)
410 : newbno2 = NULLAGBLOCK;
411 : else
412 0 : newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
413 0 : if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
414 0 : if (newlen1 < newlen2 ||
415 0 : (newlen1 == newlen2 &&
416 0 : XFS_ABSDIFF(newbno1, wantbno) >
417 0 : XFS_ABSDIFF(newbno2, wantbno)))
418 0 : newbno1 = newbno2;
419 0 : } else if (newbno2 != NULLAGBLOCK)
420 0 : newbno1 = newbno2;
421 418934907 : } else if (freeend >= wantend) {
422 : newbno1 = wantbno;
423 418439175 : } else if (alignment > 1) {
424 20110 : newbno1 = roundup(freeend - wantlen, alignment);
425 20110 : if (newbno1 > freeend - wantlen &&
426 12496 : newbno1 - alignment >= freebno)
427 : newbno1 -= alignment;
428 7614 : else if (newbno1 >= freeend)
429 0 : newbno1 = NULLAGBLOCK;
430 : } else
431 418419065 : newbno1 = freeend - wantlen;
432 2206898444 : *newbnop = newbno1;
433 2206898444 : return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
434 : }
435 :
436 : /*
437 : * Fix up the length, based on mod and prod.
438 : * len should be k * prod + mod for some k.
439 : * If len is too small it is returned unchanged.
440 : * If len hits maxlen it is left alone.
441 : */
442 : STATIC void
443 2209362052 : xfs_alloc_fix_len(
444 : xfs_alloc_arg_t *args) /* allocation argument structure */
445 : {
446 2209362052 : xfs_extlen_t k;
447 2209362052 : xfs_extlen_t rlen;
448 :
449 2209362052 : ASSERT(args->mod < args->prod);
450 2209362052 : rlen = args->len;
451 2209362052 : ASSERT(rlen >= args->minlen);
452 2209362052 : ASSERT(rlen <= args->maxlen);
453 2209362052 : if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
454 17693 : (args->mod == 0 && rlen < args->prod))
455 : return;
456 30383 : k = rlen % args->prod;
457 30383 : if (k == args->mod)
458 : return;
459 24454 : if (k > args->mod)
460 12501 : rlen = rlen - (k - args->mod);
461 : else
462 11953 : rlen = rlen - args->prod + (args->mod - k);
463 : /* casts to (int) catch length underflows */
464 24454 : if ((int)rlen < (int)args->minlen)
465 : return;
466 5923 : ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
467 5923 : ASSERT(rlen % args->prod == args->mod);
468 5923 : ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
469 : rlen + args->minleft);
470 5923 : args->len = rlen;
471 : }
472 :
473 : /*
474 : * Update the two btrees, logically removing from freespace the extent
475 : * starting at rbno, rlen blocks. The extent is contained within the
476 : * actual (current) free extent fbno for flen blocks.
477 : * Flags are passed in indicating whether the cursors are set to the
478 : * relevant records.
479 : */
480 : STATIC int /* error code */
481 58281472 : xfs_alloc_fixup_trees(
482 : struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
483 : struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
484 : xfs_agblock_t fbno, /* starting block of free extent */
485 : xfs_extlen_t flen, /* length of free extent */
486 : xfs_agblock_t rbno, /* starting block of returned extent */
487 : xfs_extlen_t rlen, /* length of returned extent */
488 : int flags) /* flags, XFSA_FIXUP_... */
489 : {
490 58281472 : int error; /* error code */
491 58281472 : int i; /* operation results */
492 58281472 : xfs_agblock_t nfbno1; /* first new free startblock */
493 58281472 : xfs_agblock_t nfbno2; /* second new free startblock */
494 58281472 : xfs_extlen_t nflen1=0; /* first new free length */
495 58281472 : xfs_extlen_t nflen2=0; /* second new free length */
496 58281472 : struct xfs_mount *mp;
497 :
498 58281472 : mp = cnt_cur->bc_mp;
499 :
500 : /*
501 : * Look up the record in the by-size tree if necessary.
502 : */
503 58281472 : if (flags & XFSA_FIXUP_CNT_OK) {
504 : #ifdef DEBUG
505 2871536 : if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
506 : return error;
507 2871526 : if (XFS_IS_CORRUPT(mp,
508 : i != 1 ||
509 : nfbno1 != fbno ||
510 : nflen1 != flen)) {
511 0 : xfs_btree_mark_sick(cnt_cur);
512 0 : return -EFSCORRUPTED;
513 : }
514 : #endif
515 : } else {
516 55409936 : if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
517 : return error;
518 55410372 : if (XFS_IS_CORRUPT(mp, i != 1)) {
519 0 : xfs_btree_mark_sick(cnt_cur);
520 0 : return -EFSCORRUPTED;
521 : }
522 : }
523 : /*
524 : * Look up the record in the by-block tree if necessary.
525 : */
526 58281898 : if (flags & XFSA_FIXUP_BNO_OK) {
527 : #ifdef DEBUG
528 77488 : if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
529 : return error;
530 77485 : if (XFS_IS_CORRUPT(mp,
531 : i != 1 ||
532 : nfbno1 != fbno ||
533 : nflen1 != flen)) {
534 0 : xfs_btree_mark_sick(bno_cur);
535 0 : return -EFSCORRUPTED;
536 : }
537 : #endif
538 : } else {
539 58204410 : if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
540 : return error;
541 58205374 : if (XFS_IS_CORRUPT(mp, i != 1)) {
542 0 : xfs_btree_mark_sick(bno_cur);
543 0 : return -EFSCORRUPTED;
544 : }
545 : }
546 :
547 : #ifdef DEBUG
548 58282859 : if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
549 36367842 : struct xfs_btree_block *bnoblock;
550 36367842 : struct xfs_btree_block *cntblock;
551 :
552 36367842 : bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
553 36367842 : cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
554 :
555 36367842 : if (XFS_IS_CORRUPT(mp,
556 : bnoblock->bb_numrecs !=
557 : cntblock->bb_numrecs)) {
558 0 : xfs_btree_mark_sick(bno_cur);
559 0 : return -EFSCORRUPTED;
560 : }
561 : }
562 : #endif
563 :
564 : /*
565 : * Deal with all four cases: the allocated record is contained
566 : * within the freespace record, so we can have new freespace
567 : * at either (or both) end, or no freespace remaining.
568 : */
569 58282859 : if (rbno == fbno && rlen == flen)
570 15226341 : nfbno1 = nfbno2 = NULLAGBLOCK;
571 43056518 : else if (rbno == fbno) {
572 37061449 : nfbno1 = rbno + rlen;
573 37061449 : nflen1 = flen - rlen;
574 37061449 : nfbno2 = NULLAGBLOCK;
575 5995069 : } else if (rbno + rlen == fbno + flen) {
576 3465232 : nfbno1 = fbno;
577 3465232 : nflen1 = flen - rlen;
578 3465232 : nfbno2 = NULLAGBLOCK;
579 : } else {
580 2529837 : nfbno1 = fbno;
581 2529837 : nflen1 = rbno - fbno;
582 2529837 : nfbno2 = rbno + rlen;
583 2529837 : nflen2 = (fbno + flen) - nfbno2;
584 : }
585 : /*
586 : * Delete the entry from the by-size btree.
587 : */
588 58282859 : if ((error = xfs_btree_delete(cnt_cur, &i)))
589 : return error;
590 58282096 : if (XFS_IS_CORRUPT(mp, i != 1)) {
591 0 : xfs_btree_mark_sick(cnt_cur);
592 0 : return -EFSCORRUPTED;
593 : }
594 : /*
595 : * Add new by-size btree entry(s).
596 : */
597 58282096 : if (nfbno1 != NULLAGBLOCK) {
598 43055964 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
599 : return error;
600 43056118 : if (XFS_IS_CORRUPT(mp, i != 0)) {
601 0 : xfs_btree_mark_sick(cnt_cur);
602 0 : return -EFSCORRUPTED;
603 : }
604 43056118 : if ((error = xfs_btree_insert(cnt_cur, &i)))
605 : return error;
606 43055771 : if (XFS_IS_CORRUPT(mp, i != 1)) {
607 0 : xfs_btree_mark_sick(cnt_cur);
608 0 : return -EFSCORRUPTED;
609 : }
610 : }
611 58281903 : if (nfbno2 != NULLAGBLOCK) {
612 2529827 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
613 : return error;
614 2529830 : if (XFS_IS_CORRUPT(mp, i != 0)) {
615 0 : xfs_btree_mark_sick(cnt_cur);
616 0 : return -EFSCORRUPTED;
617 : }
618 2529830 : if ((error = xfs_btree_insert(cnt_cur, &i)))
619 : return error;
620 2529839 : if (XFS_IS_CORRUPT(mp, i != 1)) {
621 0 : xfs_btree_mark_sick(cnt_cur);
622 0 : return -EFSCORRUPTED;
623 : }
624 : }
625 : /*
626 : * Fix up the by-block btree entry(s).
627 : */
628 58281915 : if (nfbno1 == NULLAGBLOCK) {
629 : /*
630 : * No remaining freespace, just delete the by-block tree entry.
631 : */
632 15226173 : if ((error = xfs_btree_delete(bno_cur, &i)))
633 : return error;
634 15226147 : if (XFS_IS_CORRUPT(mp, i != 1)) {
635 0 : xfs_btree_mark_sick(bno_cur);
636 0 : return -EFSCORRUPTED;
637 : }
638 : } else {
639 : /*
640 : * Update the by-block entry to start later|be shorter.
641 : */
642 43055742 : if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
643 : return error;
644 : }
645 58282072 : if (nfbno2 != NULLAGBLOCK) {
646 : /*
647 : * 2 resulting free entries, need to add one.
648 : */
649 2529841 : if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
650 : return error;
651 2529841 : if (XFS_IS_CORRUPT(mp, i != 0)) {
652 0 : xfs_btree_mark_sick(bno_cur);
653 0 : return -EFSCORRUPTED;
654 : }
655 2529841 : if ((error = xfs_btree_insert(bno_cur, &i)))
656 : return error;
657 2529835 : if (XFS_IS_CORRUPT(mp, i != 1)) {
658 0 : xfs_btree_mark_sick(bno_cur);
659 0 : return -EFSCORRUPTED;
660 : }
661 : }
662 : return 0;
663 : }
664 :
665 : /*
666 : * We do not verify the AGFL contents against AGF-based index counters here,
667 : * even though we may have access to the perag that contains shadow copies. We
668 : * don't know if the AGF based counters have been checked, and if they have they
669 : * still may be inconsistent because they haven't yet been reset on the first
670 : * allocation after the AGF has been read in.
671 : *
672 : * This means we can only check that all agfl entries contain valid or null
673 : * values because we can't reliably determine the active range to exclude
674 : * NULLAGBNO as a valid value.
675 : *
676 : * However, we can't even do that for v4 format filesystems because there are
677 : * old versions of mkfs out there that does not initialise the AGFL to known,
678 : * verifiable values. HEnce we can't tell the difference between a AGFL block
679 : * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
680 : *
681 : * As a result, we can only fully validate AGFL block numbers when we pull them
682 : * from the freelist in xfs_alloc_get_freelist().
683 : */
684 : static xfs_failaddr_t
685 477067 : xfs_agfl_verify(
686 : struct xfs_buf *bp)
687 : {
688 477067 : struct xfs_mount *mp = bp->b_mount;
689 477067 : struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
690 477067 : __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
691 477067 : int i;
692 :
693 477067 : if (!xfs_has_crc(mp))
694 : return NULL;
695 :
696 477063 : if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
697 0 : return __this_address;
698 477061 : if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
699 0 : return __this_address;
700 : /*
701 : * during growfs operations, the perag is not fully initialised,
702 : * so we can't use it for any useful checking. growfs ensures we can't
703 : * use it by using uncached buffers that don't have the perag attached
704 : * so we can detect and avoid this problem.
705 : */
706 948716 : if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
707 0 : return __this_address;
708 :
709 957856009 : for (i = 0; i < xfs_agfl_size(mp); i++) {
710 478450373 : if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
711 182977336 : be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
712 0 : return __this_address;
713 : }
714 :
715 477063 : if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
716 0 : return __this_address;
717 : return NULL;
718 : }
719 :
720 : static void
721 63394 : xfs_agfl_read_verify(
722 : struct xfs_buf *bp)
723 : {
724 63394 : struct xfs_mount *mp = bp->b_mount;
725 63394 : xfs_failaddr_t fa;
726 :
727 : /*
728 : * There is no verification of non-crc AGFLs because mkfs does not
729 : * initialise the AGFL to zero or NULL. Hence the only valid part of the
730 : * AGFL is what the AGF says is active. We can't get to the AGF, so we
731 : * can't verify just those entries are valid.
732 : */
733 63394 : if (!xfs_has_crc(mp))
734 : return;
735 :
736 63376 : if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
737 128 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
738 : else {
739 63248 : fa = xfs_agfl_verify(bp);
740 63248 : if (fa)
741 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
742 : }
743 : }
744 :
745 : static void
746 68548 : xfs_agfl_write_verify(
747 : struct xfs_buf *bp)
748 : {
749 68548 : struct xfs_mount *mp = bp->b_mount;
750 68548 : struct xfs_buf_log_item *bip = bp->b_log_item;
751 68548 : xfs_failaddr_t fa;
752 :
753 : /* no verification of non-crc AGFLs */
754 68548 : if (!xfs_has_crc(mp))
755 : return;
756 :
757 66266 : fa = xfs_agfl_verify(bp);
758 66266 : if (fa) {
759 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
760 0 : return;
761 : }
762 :
763 66266 : if (bip)
764 60858 : XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
765 :
766 66266 : xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
767 : }
768 :
769 : const struct xfs_buf_ops xfs_agfl_buf_ops = {
770 : .name = "xfs_agfl",
771 : .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
772 : .verify_read = xfs_agfl_read_verify,
773 : .verify_write = xfs_agfl_write_verify,
774 : .verify_struct = xfs_agfl_verify,
775 : };
776 :
777 : /*
778 : * Read in the allocation group free block array.
779 : */
780 : int
781 325280921 : xfs_alloc_read_agfl(
782 : struct xfs_perag *pag,
783 : struct xfs_trans *tp,
784 : struct xfs_buf **bpp)
785 : {
786 325280921 : struct xfs_mount *mp = pag->pag_mount;
787 325280921 : struct xfs_buf *bp;
788 325280921 : int error;
789 :
790 325280921 : error = xfs_trans_read_buf(
791 325280921 : mp, tp, mp->m_ddev_targp,
792 325280921 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
793 : XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
794 325287710 : if (xfs_metadata_is_sick(error))
795 128 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
796 325287710 : if (error)
797 : return error;
798 325286756 : xfs_buf_set_ref(bp, XFS_AGFL_REF);
799 325288338 : *bpp = bp;
800 325288338 : return 0;
801 : }
802 :
803 : STATIC int
804 109652907 : xfs_alloc_update_counters(
805 : struct xfs_trans *tp,
806 : struct xfs_buf *agbp,
807 : long len)
808 : {
809 109652907 : struct xfs_agf *agf = agbp->b_addr;
810 :
811 109652907 : agbp->b_pag->pagf_freeblks += len;
812 109652907 : be32_add_cpu(&agf->agf_freeblks, len);
813 :
814 328962261 : if (unlikely(be32_to_cpu(agf->agf_freeblks) >
815 : be32_to_cpu(agf->agf_length))) {
816 0 : xfs_buf_mark_corrupt(agbp);
817 0 : xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
818 0 : return -EFSCORRUPTED;
819 : }
820 :
821 109654087 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
822 109654087 : return 0;
823 : }
824 :
825 : /*
826 : * Block allocation algorithm and data structures.
827 : */
828 : struct xfs_alloc_cur {
829 : struct xfs_btree_cur *cnt; /* btree cursors */
830 : struct xfs_btree_cur *bnolt;
831 : struct xfs_btree_cur *bnogt;
832 : xfs_extlen_t cur_len;/* current search length */
833 : xfs_agblock_t rec_bno;/* extent startblock */
834 : xfs_extlen_t rec_len;/* extent length */
835 : xfs_agblock_t bno; /* alloc bno */
836 : xfs_extlen_t len; /* alloc len */
837 : xfs_extlen_t diff; /* diff from search bno */
838 : unsigned int busy_gen;/* busy state */
839 : bool busy;
840 : };
841 :
842 : /*
843 : * Set up cursors, etc. in the extent allocation cursor. This function can be
844 : * called multiple times to reset an initialized structure without having to
845 : * reallocate cursors.
846 : */
847 : static int
848 55365361 : xfs_alloc_cur_setup(
849 : struct xfs_alloc_arg *args,
850 : struct xfs_alloc_cur *acur)
851 : {
852 55365361 : int error;
853 55365361 : int i;
854 :
855 55365361 : acur->cur_len = args->maxlen;
856 55365361 : acur->rec_bno = 0;
857 55365361 : acur->rec_len = 0;
858 55365361 : acur->bno = 0;
859 55365361 : acur->len = 0;
860 55365361 : acur->diff = -1;
861 55365361 : acur->busy = false;
862 55365361 : acur->busy_gen = 0;
863 :
864 : /*
865 : * Perform an initial cntbt lookup to check for availability of maxlen
866 : * extents. If this fails, we'll return -ENOSPC to signal the caller to
867 : * attempt a small allocation.
868 : */
869 55365361 : if (!acur->cnt)
870 55333276 : acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
871 : args->agbp, args->pag, XFS_BTNUM_CNT);
872 55363182 : error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
873 55367627 : if (error)
874 : return error;
875 :
876 : /*
877 : * Allocate the bnobt left and right search cursors.
878 : */
879 55367283 : if (!acur->bnolt)
880 55333875 : acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
881 : args->agbp, args->pag, XFS_BTNUM_BNO);
882 55366716 : if (!acur->bnogt)
883 55334048 : acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
884 : args->agbp, args->pag, XFS_BTNUM_BNO);
885 55366021 : return i == 1 ? 0 : -ENOSPC;
886 : }
887 :
888 : static void
889 55333128 : xfs_alloc_cur_close(
890 : struct xfs_alloc_cur *acur,
891 : bool error)
892 : {
893 55333128 : int cur_error = XFS_BTREE_NOERROR;
894 :
895 55333128 : if (error)
896 700 : cur_error = XFS_BTREE_ERROR;
897 :
898 55333128 : if (acur->cnt)
899 55333128 : xfs_btree_del_cursor(acur->cnt, cur_error);
900 55333784 : if (acur->bnolt)
901 55333440 : xfs_btree_del_cursor(acur->bnolt, cur_error);
902 55333870 : if (acur->bnogt)
903 55333526 : xfs_btree_del_cursor(acur->bnogt, cur_error);
904 55333910 : acur->cnt = acur->bnolt = acur->bnogt = NULL;
905 55333910 : }
906 :
907 : /*
908 : * Check an extent for allocation and track the best available candidate in the
909 : * allocation structure. The cursor is deactivated if it has entered an out of
910 : * range state based on allocation arguments. Optionally return the extent
911 : * extent geometry and allocation status if requested by the caller.
912 : */
913 : static int
914 4229298031 : xfs_alloc_cur_check(
915 : struct xfs_alloc_arg *args,
916 : struct xfs_alloc_cur *acur,
917 : struct xfs_btree_cur *cur,
918 : int *new)
919 : {
920 4229298031 : int error, i;
921 4229298031 : xfs_agblock_t bno, bnoa, bnew;
922 4229298031 : xfs_extlen_t len, lena, diff = -1;
923 4229298031 : bool busy;
924 4229298031 : unsigned busy_gen = 0;
925 4229298031 : bool deactivate = false;
926 4229298031 : bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
927 :
928 4229298031 : *new = 0;
929 :
930 4229298031 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
931 4233172770 : if (error)
932 : return error;
933 4233172770 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
934 0 : xfs_btree_mark_sick(cur);
935 0 : return -EFSCORRUPTED;
936 : }
937 :
938 : /*
939 : * Check minlen and deactivate a cntbt cursor if out of acceptable size
940 : * range (i.e., walking backwards looking for a minlen extent).
941 : */
942 4233172770 : if (len < args->minlen) {
943 232846464 : deactivate = !isbnobt;
944 232846464 : goto out;
945 : }
946 :
947 4000326306 : busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
948 : &busy_gen);
949 4008546637 : acur->busy |= busy;
950 4008546637 : if (busy)
951 1960792145 : acur->busy_gen = busy_gen;
952 : /* deactivate a bnobt cursor outside of locality range */
953 4008546637 : if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
954 1167 : deactivate = isbnobt;
955 1167 : goto out;
956 : }
957 4008545470 : if (lena < args->minlen)
958 1802590566 : goto out;
959 :
960 2205954904 : args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
961 2205954904 : xfs_alloc_fix_len(args);
962 2206413765 : ASSERT(args->len >= args->minlen);
963 2206413765 : if (args->len < acur->len)
964 31130 : goto out;
965 :
966 : /*
967 : * We have an aligned record that satisfies minlen and beats or matches
968 : * the candidate extent size. Compare locality for near allocation mode.
969 : */
970 2206382635 : diff = xfs_alloc_compute_diff(args->agbno, args->len,
971 : args->alignment, args->datatype,
972 : bnoa, lena, &bnew);
973 2207261271 : if (bnew == NULLAGBLOCK)
974 0 : goto out;
975 :
976 : /*
977 : * Deactivate a bnobt cursor with worse locality than the current best.
978 : */
979 2207261271 : if (diff > acur->diff) {
980 1822171988 : deactivate = isbnobt;
981 1822171988 : goto out;
982 : }
983 :
984 385089283 : ASSERT(args->len > acur->len ||
985 : (args->len == acur->len && diff <= acur->diff));
986 385089283 : acur->rec_bno = bno;
987 385089283 : acur->rec_len = len;
988 385089283 : acur->bno = bnew;
989 385089283 : acur->len = args->len;
990 385089283 : acur->diff = diff;
991 385089283 : *new = 1;
992 :
993 : /*
994 : * We're done if we found a perfect allocation. This only deactivates
995 : * the current cursor, but this is just an optimization to terminate a
996 : * cntbt search that otherwise runs to the edge of the tree.
997 : */
998 385089283 : if (acur->diff == 0 && acur->len == args->maxlen)
999 : deactivate = true;
1000 377847446 : out:
1001 3857641315 : if (deactivate)
1002 15160316 : cur->bc_ag.abt.active = false;
1003 4242730598 : trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
1004 4242730598 : *new);
1005 4242730598 : return 0;
1006 : }
1007 :
1008 : /*
1009 : * Complete an allocation of a candidate extent. Remove the extent from both
1010 : * trees and update the args structure.
1011 : */
1012 : STATIC int
1013 55332031 : xfs_alloc_cur_finish(
1014 : struct xfs_alloc_arg *args,
1015 : struct xfs_alloc_cur *acur)
1016 : {
1017 55332031 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1018 55332031 : int error;
1019 :
1020 55332031 : ASSERT(acur->cnt && acur->bnolt);
1021 55332031 : ASSERT(acur->bno >= acur->rec_bno);
1022 55332031 : ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
1023 110664062 : ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
1024 :
1025 55332031 : error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
1026 : acur->rec_len, acur->bno, acur->len, 0);
1027 55333273 : if (error)
1028 : return error;
1029 :
1030 55333166 : args->agbno = acur->bno;
1031 55333166 : args->len = acur->len;
1032 55333166 : args->wasfromfl = 0;
1033 :
1034 55333166 : trace_xfs_alloc_cur(args);
1035 55333166 : return 0;
1036 : }
1037 :
1038 : /*
1039 : * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
1040 : * bno optimized lookup to search for extents with ideal size and locality.
1041 : */
1042 : STATIC int
1043 224897159 : xfs_alloc_cntbt_iter(
1044 : struct xfs_alloc_arg *args,
1045 : struct xfs_alloc_cur *acur)
1046 : {
1047 224897159 : struct xfs_btree_cur *cur = acur->cnt;
1048 224897159 : xfs_agblock_t bno;
1049 224897159 : xfs_extlen_t len, cur_len;
1050 224897159 : int error;
1051 224897159 : int i;
1052 :
1053 449794318 : if (!xfs_alloc_cur_active(cur))
1054 : return 0;
1055 :
1056 : /* locality optimized lookup */
1057 224884049 : cur_len = acur->cur_len;
1058 224884049 : error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
1059 224881836 : if (error)
1060 : return error;
1061 224881835 : if (i == 0)
1062 : return 0;
1063 210864264 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1064 210877295 : if (error)
1065 : return error;
1066 :
1067 : /* check the current record and update search length from it */
1068 210879680 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1069 210758216 : if (error)
1070 : return error;
1071 210758216 : ASSERT(len >= acur->cur_len);
1072 210758216 : acur->cur_len = len;
1073 :
1074 : /*
1075 : * We looked up the first record >= [agbno, len] above. The agbno is a
1076 : * secondary key and so the current record may lie just before or after
1077 : * agbno. If it is past agbno, check the previous record too so long as
1078 : * the length matches as it may be closer. Don't check a smaller record
1079 : * because that could deactivate our cursor.
1080 : */
1081 210758216 : if (bno > args->agbno) {
1082 206970400 : error = xfs_btree_decrement(cur, 0, &i);
1083 207009281 : if (!error && i) {
1084 205881770 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1085 205883059 : if (!error && i && len == acur->cur_len)
1086 52854028 : error = xfs_alloc_cur_check(args, acur, cur,
1087 : &i);
1088 : }
1089 207009740 : if (error)
1090 : return error;
1091 : }
1092 :
1093 : /*
1094 : * Increment the search key until we find at least one allocation
1095 : * candidate or if the extent we found was larger. Otherwise, double the
1096 : * search key to optimize the search. Efficiency is more important here
1097 : * than absolute best locality.
1098 : */
1099 210797556 : cur_len <<= 1;
1100 210797556 : if (!acur->len || acur->cur_len >= cur_len)
1101 160572399 : acur->cur_len++;
1102 : else
1103 50225157 : acur->cur_len = cur_len;
1104 :
1105 : return error;
1106 : }
1107 :
1108 : /*
1109 : * Deal with the case where only small freespaces remain. Either return the
1110 : * contents of the last freespace record, or allocate space from the freelist if
1111 : * there is nothing in the tree.
1112 : */
1113 : STATIC int /* error */
1114 87078 : xfs_alloc_ag_vextent_small(
1115 : struct xfs_alloc_arg *args, /* allocation argument structure */
1116 : struct xfs_btree_cur *ccur, /* optional by-size cursor */
1117 : xfs_agblock_t *fbnop, /* result block number */
1118 : xfs_extlen_t *flenp, /* result length */
1119 : int *stat) /* status: 0-freelist, 1-normal/none */
1120 : {
1121 87078 : struct xfs_agf *agf = args->agbp->b_addr;
1122 87078 : int error = 0;
1123 87078 : xfs_agblock_t fbno = NULLAGBLOCK;
1124 87078 : xfs_extlen_t flen = 0;
1125 87078 : int i = 0;
1126 :
1127 : /*
1128 : * If a cntbt cursor is provided, try to allocate the largest record in
1129 : * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1130 : * allocation. Make sure to respect minleft even when pulling from the
1131 : * freelist.
1132 : */
1133 87078 : if (ccur)
1134 87078 : error = xfs_btree_decrement(ccur, 0, &i);
1135 87078 : if (error)
1136 0 : goto error;
1137 87078 : if (i) {
1138 87078 : error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1139 87078 : if (error)
1140 0 : goto error;
1141 87078 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1142 0 : xfs_btree_mark_sick(ccur);
1143 0 : error = -EFSCORRUPTED;
1144 0 : goto error;
1145 : }
1146 87078 : goto out;
1147 : }
1148 :
1149 0 : if (args->minlen != 1 || args->alignment != 1 ||
1150 0 : args->resv == XFS_AG_RESV_AGFL ||
1151 0 : be32_to_cpu(agf->agf_flcount) <= args->minleft)
1152 0 : goto out;
1153 :
1154 0 : error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
1155 : &fbno, 0);
1156 0 : if (error)
1157 0 : goto error;
1158 0 : if (fbno == NULLAGBLOCK)
1159 0 : goto out;
1160 :
1161 0 : xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
1162 0 : (args->datatype & XFS_ALLOC_NOBUSY));
1163 :
1164 0 : if (args->datatype & XFS_ALLOC_USERDATA) {
1165 0 : struct xfs_buf *bp;
1166 :
1167 0 : error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1168 0 : XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1169 : args->mp->m_bsize, 0, &bp);
1170 0 : if (error)
1171 0 : goto error;
1172 0 : xfs_trans_binval(args->tp, bp);
1173 : }
1174 0 : *fbnop = args->agbno = fbno;
1175 0 : *flenp = args->len = 1;
1176 0 : if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1177 0 : xfs_btree_mark_sick(ccur);
1178 0 : error = -EFSCORRUPTED;
1179 0 : goto error;
1180 : }
1181 0 : args->wasfromfl = 1;
1182 0 : trace_xfs_alloc_small_freelist(args);
1183 :
1184 : /*
1185 : * If we're feeding an AGFL block to something that doesn't live in the
1186 : * free space, we need to clear out the OWN_AG rmap.
1187 : */
1188 0 : error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
1189 : &XFS_RMAP_OINFO_AG);
1190 0 : if (error)
1191 0 : goto error;
1192 :
1193 0 : *stat = 0;
1194 0 : return 0;
1195 :
1196 87078 : out:
1197 : /*
1198 : * Can't do the allocation, give up.
1199 : */
1200 87078 : if (flen < args->minlen) {
1201 0 : args->agbno = NULLAGBLOCK;
1202 0 : trace_xfs_alloc_small_notenough(args);
1203 0 : flen = 0;
1204 : }
1205 87078 : *fbnop = fbno;
1206 87078 : *flenp = flen;
1207 87078 : *stat = 1;
1208 87078 : trace_xfs_alloc_small_done(args);
1209 87078 : return 0;
1210 :
1211 0 : error:
1212 0 : trace_xfs_alloc_small_error(args);
1213 0 : return error;
1214 : }
1215 :
1216 : /*
1217 : * Allocate a variable extent at exactly agno/bno.
1218 : * Extent's length (returned in *len) will be between minlen and maxlen,
1219 : * and of the form k * prod + mod unless there's nothing that large.
1220 : * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1221 : */
1222 : STATIC int /* error */
1223 274560 : xfs_alloc_ag_vextent_exact(
1224 : xfs_alloc_arg_t *args) /* allocation argument structure */
1225 : {
1226 274560 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1227 274560 : struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1228 274560 : struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
1229 274560 : int error;
1230 274560 : xfs_agblock_t fbno; /* start block of found extent */
1231 274560 : xfs_extlen_t flen; /* length of found extent */
1232 274560 : xfs_agblock_t tbno; /* start block of busy extent */
1233 274560 : xfs_extlen_t tlen; /* length of busy extent */
1234 274560 : xfs_agblock_t tend; /* end block of busy extent */
1235 274560 : int i; /* success/failure of operation */
1236 274560 : unsigned busy_gen;
1237 :
1238 274560 : ASSERT(args->alignment == 1);
1239 :
1240 : /*
1241 : * Allocate/initialize a cursor for the by-number freespace btree.
1242 : */
1243 274560 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1244 : args->pag, XFS_BTNUM_BNO);
1245 :
1246 : /*
1247 : * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1248 : * Look for the closest free block <= bno, it must contain bno
1249 : * if any free block does.
1250 : */
1251 274561 : error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1252 274561 : if (error)
1253 2 : goto error0;
1254 274559 : if (!i)
1255 21006 : goto not_found;
1256 :
1257 : /*
1258 : * Grab the freespace record.
1259 : */
1260 253553 : error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1261 253552 : if (error)
1262 0 : goto error0;
1263 253552 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1264 0 : xfs_btree_mark_sick(bno_cur);
1265 0 : error = -EFSCORRUPTED;
1266 0 : goto error0;
1267 : }
1268 253552 : ASSERT(fbno <= args->agbno);
1269 :
1270 : /*
1271 : * Check for overlapping busy extents.
1272 : */
1273 253552 : tbno = fbno;
1274 253552 : tlen = flen;
1275 253552 : xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
1276 :
1277 : /*
1278 : * Give up if the start of the extent is busy, or the freespace isn't
1279 : * long enough for the minimum request.
1280 : */
1281 253551 : if (tbno > args->agbno)
1282 316 : goto not_found;
1283 253235 : if (tlen < args->minlen)
1284 170141 : goto not_found;
1285 83094 : tend = tbno + tlen;
1286 83094 : if (tend < args->agbno + args->minlen)
1287 5609 : goto not_found;
1288 :
1289 : /*
1290 : * End of extent will be smaller of the freespace end and the
1291 : * maximal requested end.
1292 : *
1293 : * Fix the length according to mod and prod if given.
1294 : */
1295 77485 : args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1296 77485 : - args->agbno;
1297 77485 : xfs_alloc_fix_len(args);
1298 77484 : ASSERT(args->agbno + args->len <= tend);
1299 :
1300 : /*
1301 : * We are allocating agbno for args->len
1302 : * Allocate/initialize a cursor for the by-size btree.
1303 : */
1304 77484 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1305 : args->pag, XFS_BTNUM_CNT);
1306 154966 : ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
1307 77483 : error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1308 : args->len, XFSA_FIXUP_BNO_OK);
1309 77488 : if (error) {
1310 0 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1311 0 : goto error0;
1312 : }
1313 :
1314 77488 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1315 77488 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1316 :
1317 77486 : args->wasfromfl = 0;
1318 77486 : trace_xfs_alloc_exact_done(args);
1319 77486 : return 0;
1320 :
1321 197072 : not_found:
1322 : /* Didn't find it, return null. */
1323 197072 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1324 197072 : args->agbno = NULLAGBLOCK;
1325 197072 : trace_xfs_alloc_exact_notfound(args);
1326 197072 : return 0;
1327 :
1328 2 : error0:
1329 2 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1330 2 : trace_xfs_alloc_exact_error(args);
1331 2 : return error;
1332 : }
1333 :
1334 : /*
1335 : * Search a given number of btree records in a given direction. Check each
1336 : * record against the good extent we've already found.
1337 : */
1338 : STATIC int
1339 516593677 : xfs_alloc_walk_iter(
1340 : struct xfs_alloc_arg *args,
1341 : struct xfs_alloc_cur *acur,
1342 : struct xfs_btree_cur *cur,
1343 : bool increment,
1344 : bool find_one, /* quit on first candidate */
1345 : int count, /* rec count (-1 for infinite) */
1346 : int *stat)
1347 : {
1348 516593677 : int error;
1349 516593677 : int i;
1350 :
1351 516593677 : *stat = 0;
1352 :
1353 : /*
1354 : * Search so long as the cursor is active or we find a better extent.
1355 : * The cursor is deactivated if it extends beyond the range of the
1356 : * current allocation candidate.
1357 : */
1358 8899143180 : while (xfs_alloc_cur_active(cur) && count) {
1359 3969595154 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1360 3978241639 : if (error)
1361 0 : return error;
1362 3978241639 : if (i == 1) {
1363 358970425 : *stat = 1;
1364 358970425 : if (find_one)
1365 : break;
1366 : }
1367 7902938070 : if (!xfs_alloc_cur_active(cur))
1368 : break;
1369 :
1370 3941242747 : if (increment)
1371 3470639029 : error = xfs_btree_increment(cur, 0, &i);
1372 : else
1373 470603718 : error = xfs_btree_decrement(cur, 0, &i);
1374 3932977920 : if (error)
1375 7 : return error;
1376 3932977913 : if (i == 0)
1377 23293853 : cur->bc_ag.abt.active = false;
1378 :
1379 3932977913 : if (count > 0)
1380 416049115 : count--;
1381 : }
1382 :
1383 : return 0;
1384 : }
1385 :
1386 : /*
1387 : * Search the by-bno and by-size btrees in parallel in search of an extent with
1388 : * ideal locality based on the NEAR mode ->agbno locality hint.
1389 : */
1390 : STATIC int
1391 33219495 : xfs_alloc_ag_vextent_locality(
1392 : struct xfs_alloc_arg *args,
1393 : struct xfs_alloc_cur *acur,
1394 : int *stat)
1395 : {
1396 33219495 : struct xfs_btree_cur *fbcur = NULL;
1397 33219495 : int error;
1398 33219495 : int i;
1399 33219495 : bool fbinc;
1400 :
1401 33219495 : ASSERT(acur->len == 0);
1402 :
1403 33219495 : *stat = 0;
1404 :
1405 33219495 : error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1406 33219828 : if (error)
1407 : return error;
1408 33219994 : error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1409 33219656 : if (error)
1410 : return error;
1411 33220228 : error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1412 33220302 : if (error)
1413 : return error;
1414 :
1415 : /*
1416 : * Search the bnobt and cntbt in parallel. Search the bnobt left and
1417 : * right and lookup the closest extent to the locality hint for each
1418 : * extent size key in the cntbt. The entire search terminates
1419 : * immediately on a bnobt hit because that means we've found best case
1420 : * locality. Otherwise the search continues until the cntbt cursor runs
1421 : * off the end of the tree. If no allocation candidate is found at this
1422 : * point, give up on locality, walk backwards from the end of the cntbt
1423 : * and take the first available extent.
1424 : *
1425 : * The parallel tree searches balance each other out to provide fairly
1426 : * consistent performance for various situations. The bnobt search can
1427 : * have pathological behavior in the worst case scenario of larger
1428 : * allocation requests and fragmented free space. On the other hand, the
1429 : * bnobt is able to satisfy most smaller allocation requests much more
1430 : * quickly than the cntbt. The cntbt search can sift through fragmented
1431 : * free space and sets of free extents for larger allocation requests
1432 : * more quickly than the bnobt. Since the locality hint is just a hint
1433 : * and we don't want to scan the entire bnobt for perfect locality, the
1434 : * cntbt search essentially bounds the bnobt search such that we can
1435 : * find good enough locality at reasonable performance in most cases.
1436 : */
1437 39878596 : while (xfs_alloc_cur_active(acur->bnolt) ||
1438 487955450 : xfs_alloc_cur_active(acur->bnogt) ||
1439 11626 : xfs_alloc_cur_active(acur->cnt)) {
1440 :
1441 243971912 : trace_xfs_alloc_cur_lookup(args);
1442 :
1443 : /*
1444 : * Search the bnobt left and right. In the case of a hit, finish
1445 : * the search in the opposite direction and we're done.
1446 : */
1447 243940006 : error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1448 : true, 1, &i);
1449 244024331 : if (error)
1450 0 : return error;
1451 244024331 : if (i == 1) {
1452 12192912 : trace_xfs_alloc_cur_left(args);
1453 12193192 : fbcur = acur->bnogt;
1454 12193192 : fbinc = true;
1455 12193192 : break;
1456 : }
1457 231831419 : error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1458 : 1, &i);
1459 231885585 : if (error)
1460 0 : return error;
1461 231885585 : if (i == 1) {
1462 7004171 : trace_xfs_alloc_cur_right(args);
1463 7004198 : fbcur = acur->bnolt;
1464 7004198 : fbinc = false;
1465 7004198 : break;
1466 : }
1467 :
1468 : /*
1469 : * Check the extent with best locality based on the current
1470 : * extent size search key and keep track of the best candidate.
1471 : */
1472 224881414 : error = xfs_alloc_cntbt_iter(args, acur);
1473 224774294 : if (error)
1474 1 : return error;
1475 449548586 : if (!xfs_alloc_cur_active(acur->cnt)) {
1476 14022681 : trace_xfs_alloc_cur_lookup_done(args);
1477 14022681 : break;
1478 : }
1479 : }
1480 :
1481 : /*
1482 : * If we failed to find anything due to busy extents, return empty
1483 : * handed so the caller can flush and retry. If no busy extents were
1484 : * found, walk backwards from the end of the cntbt as a last resort.
1485 : */
1486 66440280 : if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1487 22241 : error = xfs_btree_decrement(acur->cnt, 0, &i);
1488 22241 : if (error)
1489 : return error;
1490 22241 : if (i) {
1491 22241 : acur->cnt->bc_ag.abt.active = true;
1492 22241 : fbcur = acur->cnt;
1493 22241 : fbinc = false;
1494 : }
1495 : }
1496 :
1497 : /*
1498 : * Search in the opposite direction for a better entry in the case of
1499 : * a bnobt hit or walk backwards from the end of the cntbt.
1500 : */
1501 33220140 : if (fbcur) {
1502 19219529 : error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1503 : &i);
1504 19219614 : if (error)
1505 : return error;
1506 : }
1507 :
1508 33220218 : if (acur->len)
1509 33186007 : *stat = 1;
1510 :
1511 : return 0;
1512 : }
1513 :
1514 : /* Check the last block of the cnt btree for allocations. */
1515 : static int
1516 44383625 : xfs_alloc_ag_vextent_lastblock(
1517 : struct xfs_alloc_arg *args,
1518 : struct xfs_alloc_cur *acur,
1519 : xfs_agblock_t *bno,
1520 : xfs_extlen_t *len,
1521 : bool *allocated)
1522 : {
1523 44383625 : int error;
1524 44383625 : int i;
1525 :
1526 : #ifdef DEBUG
1527 : /* Randomly don't execute the first algorithm. */
1528 44383625 : if (get_random_u32_below(2))
1529 : return 0;
1530 : #endif
1531 :
1532 : /*
1533 : * Start from the entry that lookup found, sequence through all larger
1534 : * free blocks. If we're actually pointing at a record smaller than
1535 : * maxlen, go to the start of this block, and skip all those smaller
1536 : * than minlen.
1537 : */
1538 22191019 : if (*len || args->alignment > 1) {
1539 225299 : acur->cnt->bc_levels[0].ptr = 1;
1540 30695280 : do {
1541 30695280 : error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1542 30695256 : if (error)
1543 0 : return error;
1544 30695256 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1545 0 : xfs_btree_mark_sick(acur->cnt);
1546 0 : return -EFSCORRUPTED;
1547 : }
1548 30695256 : if (*len >= args->minlen)
1549 : break;
1550 30469959 : error = xfs_btree_increment(acur->cnt, 0, &i);
1551 30469981 : if (error)
1552 0 : return error;
1553 30469981 : } while (i);
1554 225297 : ASSERT(*len >= args->minlen);
1555 225297 : if (!i)
1556 : return 0;
1557 : }
1558 :
1559 22191017 : error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1560 22191218 : if (error)
1561 : return error;
1562 :
1563 : /*
1564 : * It didn't work. We COULD be in a case where there's a good record
1565 : * somewhere, so try again.
1566 : */
1567 22191218 : if (acur->len == 0)
1568 : return 0;
1569 :
1570 22147667 : trace_xfs_alloc_near_first(args);
1571 22147729 : *allocated = true;
1572 22147729 : return 0;
1573 : }
1574 :
1575 : /*
1576 : * Allocate a variable extent near bno in the allocation group agno.
1577 : * Extent's length (returned in len) will be between minlen and maxlen,
1578 : * and of the form k * prod + mod unless there's nothing that large.
1579 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1580 : */
1581 : STATIC int
1582 55333512 : xfs_alloc_ag_vextent_near(
1583 : struct xfs_alloc_arg *args,
1584 : uint32_t alloc_flags)
1585 : {
1586 55333512 : struct xfs_alloc_cur acur = {};
1587 55333512 : int error; /* error code */
1588 55333512 : int i; /* result code, temporary */
1589 55333512 : xfs_agblock_t bno;
1590 55333512 : xfs_extlen_t len;
1591 :
1592 : /* handle uninitialized agbno range so caller doesn't have to */
1593 55333512 : if (!args->min_agbno && !args->max_agbno)
1594 55058534 : args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1595 55333512 : ASSERT(args->min_agbno <= args->max_agbno);
1596 :
1597 : /* clamp agbno to the range if it's outside */
1598 55333512 : if (args->agbno < args->min_agbno)
1599 76947 : args->agbno = args->min_agbno;
1600 55333512 : if (args->agbno > args->max_agbno)
1601 0 : args->agbno = args->max_agbno;
1602 :
1603 : /* Retry once quickly if we find busy extents before blocking. */
1604 55333512 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1605 55367095 : restart:
1606 55367095 : len = 0;
1607 :
1608 : /*
1609 : * Set up cursors and see if there are any free extents as big as
1610 : * maxlen. If not, pick the last entry in the tree unless the tree is
1611 : * empty.
1612 : */
1613 55367095 : error = xfs_alloc_cur_setup(args, &acur);
1614 55366952 : if (error == -ENOSPC) {
1615 61249 : error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1616 : &len, &i);
1617 61249 : if (error)
1618 0 : goto out;
1619 61249 : if (i == 0 || len == 0) {
1620 0 : trace_xfs_alloc_near_noentry(args);
1621 0 : goto out;
1622 : }
1623 61249 : ASSERT(i == 1);
1624 55305703 : } else if (error) {
1625 344 : goto out;
1626 : }
1627 :
1628 : /*
1629 : * First algorithm.
1630 : * If the requested extent is large wrt the freespaces available
1631 : * in this a.g., then the cursor will be pointing to a btree entry
1632 : * near the right edge of the tree. If it's in the last btree leaf
1633 : * block, then we just examine all the entries in that block
1634 : * that are big enough, and pick the best one.
1635 : */
1636 55366608 : if (xfs_btree_islastblock(acur.cnt, 0)) {
1637 44383640 : bool allocated = false;
1638 :
1639 44383640 : error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1640 : &allocated);
1641 44384043 : if (error)
1642 0 : goto out;
1643 44384043 : if (allocated)
1644 22147707 : goto alloc_finish;
1645 : }
1646 :
1647 : /*
1648 : * Second algorithm. Combined cntbt and bnobt search to find ideal
1649 : * locality.
1650 : */
1651 33219755 : error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1652 33219531 : if (error)
1653 164 : goto out;
1654 :
1655 : /*
1656 : * If we couldn't get anything, give up.
1657 : */
1658 33219367 : if (!acur.len) {
1659 33584 : if (acur.busy) {
1660 : /*
1661 : * Our only valid extents must have been busy. Flush and
1662 : * retry the allocation again. If we get an -EAGAIN
1663 : * error, we're being told that a deadlock was avoided
1664 : * and the current transaction needs committing before
1665 : * the allocation can be retried.
1666 : */
1667 33584 : trace_xfs_alloc_near_busy(args);
1668 33584 : error = xfs_extent_busy_flush(args->tp, args->pag,
1669 : acur.busy_gen, alloc_flags);
1670 33584 : if (error)
1671 1 : goto out;
1672 :
1673 33583 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1674 33583 : goto restart;
1675 : }
1676 0 : trace_xfs_alloc_size_neither(args);
1677 0 : args->agbno = NULLAGBLOCK;
1678 0 : goto out;
1679 : }
1680 :
1681 33185783 : alloc_finish:
1682 : /* fix up btrees on a successful allocation */
1683 55333490 : error = xfs_alloc_cur_finish(args, &acur);
1684 :
1685 55333421 : out:
1686 55333421 : xfs_alloc_cur_close(&acur, error);
1687 55334054 : return error;
1688 : }
1689 :
1690 : /*
1691 : * Allocate a variable extent anywhere in the allocation group agno.
1692 : * Extent's length (returned in len) will be between minlen and maxlen,
1693 : * and of the form k * prod + mod unless there's nothing that large.
1694 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1695 : */
1696 : static int
1697 2886321 : xfs_alloc_ag_vextent_size(
1698 : struct xfs_alloc_arg *args,
1699 : uint32_t alloc_flags)
1700 : {
1701 2886321 : struct xfs_agf *agf = args->agbp->b_addr;
1702 2886321 : struct xfs_btree_cur *bno_cur;
1703 2886321 : struct xfs_btree_cur *cnt_cur;
1704 2886321 : xfs_agblock_t fbno; /* start of found freespace */
1705 2886321 : xfs_extlen_t flen; /* length of found freespace */
1706 2886321 : xfs_agblock_t rbno; /* returned block number */
1707 2886321 : xfs_extlen_t rlen; /* length of returned extent */
1708 2886321 : bool busy;
1709 2886321 : unsigned busy_gen;
1710 2886321 : int error;
1711 2886321 : int i;
1712 :
1713 : /* Retry once quickly if we find busy extents before blocking. */
1714 2886321 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1715 : restart:
1716 : /*
1717 : * Allocate and initialize a cursor for the by-size btree.
1718 : */
1719 2894804 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1720 : args->pag, XFS_BTNUM_CNT);
1721 2894891 : bno_cur = NULL;
1722 :
1723 : /*
1724 : * Look for an entry >= maxlen+alignment-1 blocks.
1725 : */
1726 2894898 : if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1727 2894891 : args->maxlen + args->alignment - 1, &i)))
1728 39 : goto error0;
1729 :
1730 : /*
1731 : * If none then we have to settle for a smaller extent. In the case that
1732 : * there are no large extents, this will return the last entry in the
1733 : * tree unless the tree is empty. In the case that there are only busy
1734 : * large extents, this will return the largest small extent unless there
1735 : * are no smaller extents available.
1736 : */
1737 2894859 : if (!i) {
1738 25829 : error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1739 : &fbno, &flen, &i);
1740 25829 : if (error)
1741 0 : goto error0;
1742 25829 : if (i == 0 || flen == 0) {
1743 0 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1744 0 : trace_xfs_alloc_size_noentry(args);
1745 0 : return 0;
1746 : }
1747 25829 : ASSERT(i == 1);
1748 25829 : busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1749 : &rlen, &busy_gen);
1750 : } else {
1751 : /*
1752 : * Search for a non-busy extent that is large enough.
1753 : */
1754 363985624 : for (;;) {
1755 183427327 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1756 183441289 : if (error)
1757 0 : goto error0;
1758 183441289 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1759 0 : xfs_btree_mark_sick(cnt_cur);
1760 0 : error = -EFSCORRUPTED;
1761 0 : goto error0;
1762 : }
1763 :
1764 183441289 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1765 : &rbno, &rlen, &busy_gen);
1766 :
1767 183442226 : if (rlen >= args->maxlen)
1768 : break;
1769 :
1770 180581535 : error = xfs_btree_increment(cnt_cur, 0, &i);
1771 180566625 : if (error)
1772 0 : goto error0;
1773 180566625 : if (i)
1774 180558297 : continue;
1775 :
1776 : /*
1777 : * Our only valid extents must have been busy. Flush and
1778 : * retry the allocation again. If we get an -EAGAIN
1779 : * error, we're being told that a deadlock was avoided
1780 : * and the current transaction needs committing before
1781 : * the allocation can be retried.
1782 : */
1783 8328 : trace_xfs_alloc_size_busy(args);
1784 8328 : error = xfs_extent_busy_flush(args->tp, args->pag,
1785 : busy_gen, alloc_flags);
1786 8328 : if (error)
1787 0 : goto error0;
1788 :
1789 8328 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1790 8328 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1791 8328 : goto restart;
1792 : }
1793 : }
1794 :
1795 : /*
1796 : * In the first case above, we got the last entry in the
1797 : * by-size btree. Now we check to see if the space hits maxlen
1798 : * once aligned; if not, we search left for something better.
1799 : * This can't happen in the second case above.
1800 : */
1801 2886504 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1802 2886504 : if (XFS_IS_CORRUPT(args->mp,
1803 : rlen != 0 &&
1804 : (rlen > flen ||
1805 : rbno + rlen > fbno + flen))) {
1806 0 : xfs_btree_mark_sick(cnt_cur);
1807 0 : error = -EFSCORRUPTED;
1808 0 : goto error0;
1809 : }
1810 2886504 : if (rlen < args->maxlen) {
1811 25829 : xfs_agblock_t bestfbno;
1812 25829 : xfs_extlen_t bestflen;
1813 25829 : xfs_agblock_t bestrbno;
1814 25829 : xfs_extlen_t bestrlen;
1815 :
1816 25829 : bestrlen = rlen;
1817 25829 : bestrbno = rbno;
1818 25829 : bestflen = flen;
1819 25829 : bestfbno = fbno;
1820 3587419 : for (;;) {
1821 3587419 : if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1822 0 : goto error0;
1823 3587419 : if (i == 0)
1824 : break;
1825 3587259 : if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1826 : &i)))
1827 0 : goto error0;
1828 3587259 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1829 0 : xfs_btree_mark_sick(cnt_cur);
1830 0 : error = -EFSCORRUPTED;
1831 0 : goto error0;
1832 : }
1833 3587259 : if (flen < bestrlen)
1834 : break;
1835 3561590 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1836 : &rbno, &rlen, &busy_gen);
1837 3561590 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1838 3561590 : if (XFS_IS_CORRUPT(args->mp,
1839 : rlen != 0 &&
1840 : (rlen > flen ||
1841 : rbno + rlen > fbno + flen))) {
1842 0 : xfs_btree_mark_sick(cnt_cur);
1843 0 : error = -EFSCORRUPTED;
1844 0 : goto error0;
1845 : }
1846 3561590 : if (rlen > bestrlen) {
1847 14989 : bestrlen = rlen;
1848 14989 : bestrbno = rbno;
1849 14989 : bestflen = flen;
1850 14989 : bestfbno = fbno;
1851 14989 : if (rlen == args->maxlen)
1852 : break;
1853 : }
1854 : }
1855 25829 : if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1856 : &i)))
1857 0 : goto error0;
1858 25829 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1859 0 : xfs_btree_mark_sick(cnt_cur);
1860 0 : error = -EFSCORRUPTED;
1861 0 : goto error0;
1862 : }
1863 25829 : rlen = bestrlen;
1864 25829 : rbno = bestrbno;
1865 25829 : flen = bestflen;
1866 25829 : fbno = bestfbno;
1867 : }
1868 2886504 : args->wasfromfl = 0;
1869 : /*
1870 : * Fix up the length.
1871 : */
1872 2886504 : args->len = rlen;
1873 2886504 : if (rlen < args->minlen) {
1874 14985 : if (busy) {
1875 : /*
1876 : * Our only valid extents must have been busy. Flush and
1877 : * retry the allocation again. If we get an -EAGAIN
1878 : * error, we're being told that a deadlock was avoided
1879 : * and the current transaction needs committing before
1880 : * the allocation can be retried.
1881 : */
1882 155 : trace_xfs_alloc_size_busy(args);
1883 155 : error = xfs_extent_busy_flush(args->tp, args->pag,
1884 : busy_gen, alloc_flags);
1885 155 : if (error)
1886 0 : goto error0;
1887 :
1888 155 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1889 155 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1890 155 : goto restart;
1891 : }
1892 14830 : goto out_nominleft;
1893 : }
1894 2871519 : xfs_alloc_fix_len(args);
1895 :
1896 2871507 : rlen = args->len;
1897 2871507 : if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1898 0 : xfs_btree_mark_sick(cnt_cur);
1899 0 : error = -EFSCORRUPTED;
1900 0 : goto error0;
1901 : }
1902 : /*
1903 : * Allocate and initialize a cursor for the by-block tree.
1904 : */
1905 2871507 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1906 : args->pag, XFS_BTNUM_BNO);
1907 2871516 : if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1908 : rbno, rlen, XFSA_FIXUP_CNT_OK)))
1909 33 : goto error0;
1910 2871498 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1911 2871492 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1912 2871487 : cnt_cur = bno_cur = NULL;
1913 2871487 : args->len = rlen;
1914 2871487 : args->agbno = rbno;
1915 5742974 : if (XFS_IS_CORRUPT(args->mp,
1916 : args->agbno + args->len >
1917 : be32_to_cpu(agf->agf_length))) {
1918 0 : xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
1919 0 : error = -EFSCORRUPTED;
1920 0 : goto error0;
1921 : }
1922 2871487 : trace_xfs_alloc_size_done(args);
1923 2871487 : return 0;
1924 :
1925 72 : error0:
1926 72 : trace_xfs_alloc_size_error(args);
1927 72 : if (cnt_cur)
1928 72 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1929 72 : if (bno_cur)
1930 33 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1931 : return error;
1932 :
1933 : out_nominleft:
1934 14830 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1935 14830 : trace_xfs_alloc_size_nominleft(args);
1936 14830 : args->agbno = NULLAGBLOCK;
1937 14830 : return 0;
1938 : }
1939 :
1940 : /*
1941 : * Free the extent starting at agno/bno for length.
1942 : */
1943 : STATIC int
1944 51375887 : xfs_free_ag_extent(
1945 : struct xfs_trans *tp,
1946 : struct xfs_buf *agbp,
1947 : xfs_agnumber_t agno,
1948 : xfs_agblock_t bno,
1949 : xfs_extlen_t len,
1950 : const struct xfs_owner_info *oinfo,
1951 : enum xfs_ag_resv_type type)
1952 : {
1953 51375887 : struct xfs_mount *mp;
1954 51375887 : struct xfs_btree_cur *bno_cur;
1955 51375887 : struct xfs_btree_cur *cnt_cur;
1956 51375887 : xfs_agblock_t gtbno; /* start of right neighbor */
1957 51375887 : xfs_extlen_t gtlen; /* length of right neighbor */
1958 51375887 : xfs_agblock_t ltbno; /* start of left neighbor */
1959 51375887 : xfs_extlen_t ltlen; /* length of left neighbor */
1960 51375887 : xfs_agblock_t nbno; /* new starting block of freesp */
1961 51375887 : xfs_extlen_t nlen; /* new length of freespace */
1962 51375887 : int haveleft; /* have a left neighbor */
1963 51375887 : int haveright; /* have a right neighbor */
1964 51375887 : int i;
1965 51375887 : int error;
1966 51375887 : struct xfs_perag *pag = agbp->b_pag;
1967 :
1968 51375887 : bno_cur = cnt_cur = NULL;
1969 51375887 : mp = tp->t_mountp;
1970 :
1971 51375887 : if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1972 1048119 : error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
1973 1048091 : if (error)
1974 20 : goto error0;
1975 : }
1976 :
1977 : /*
1978 : * Allocate and initialize a cursor for the by-block btree.
1979 : */
1980 51375839 : bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
1981 : /*
1982 : * Look for a neighboring block on the left (lower block numbers)
1983 : * that is contiguous with this space.
1984 : */
1985 51375579 : if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1986 126 : goto error0;
1987 51375689 : if (haveleft) {
1988 : /*
1989 : * There is a block to our left.
1990 : */
1991 50455099 : if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)))
1992 0 : goto error0;
1993 50455534 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1994 0 : xfs_btree_mark_sick(bno_cur);
1995 0 : error = -EFSCORRUPTED;
1996 0 : goto error0;
1997 : }
1998 : /*
1999 : * It's not contiguous, though.
2000 : */
2001 50455534 : if (ltbno + ltlen < bno)
2002 36742095 : haveleft = 0;
2003 : else {
2004 : /*
2005 : * If this failure happens the request to free this
2006 : * space was invalid, it's (partly) already free.
2007 : * Very bad.
2008 : */
2009 13713439 : if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
2010 0 : xfs_btree_mark_sick(bno_cur);
2011 0 : error = -EFSCORRUPTED;
2012 0 : goto error0;
2013 : }
2014 : }
2015 : }
2016 : /*
2017 : * Look for a neighboring block on the right (higher block numbers)
2018 : * that is contiguous with this space.
2019 : */
2020 51376124 : if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2021 0 : goto error0;
2022 51375362 : if (haveright) {
2023 : /*
2024 : * There is a block to our right.
2025 : */
2026 51305013 : if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)))
2027 0 : goto error0;
2028 51306434 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2029 0 : xfs_btree_mark_sick(bno_cur);
2030 0 : error = -EFSCORRUPTED;
2031 0 : goto error0;
2032 : }
2033 : /*
2034 : * It's not contiguous, though.
2035 : */
2036 51306434 : if (bno + len < gtbno)
2037 33087925 : haveright = 0;
2038 : else {
2039 : /*
2040 : * If this failure happens the request to free this
2041 : * space was invalid, it's (partly) already free.
2042 : * Very bad.
2043 : */
2044 18218509 : if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
2045 0 : xfs_btree_mark_sick(bno_cur);
2046 0 : error = -EFSCORRUPTED;
2047 0 : goto error0;
2048 : }
2049 : }
2050 : }
2051 : /*
2052 : * Now allocate and initialize a cursor for the by-size tree.
2053 : */
2054 51376783 : cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
2055 : /*
2056 : * Have both left and right contiguous neighbors.
2057 : * Merge all three into a single free block.
2058 : */
2059 51376120 : if (haveleft && haveright) {
2060 : /*
2061 : * Delete the old by-size entry on the left.
2062 : */
2063 7719796 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2064 10 : goto error0;
2065 7719795 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2066 0 : xfs_btree_mark_sick(cnt_cur);
2067 0 : error = -EFSCORRUPTED;
2068 0 : goto error0;
2069 : }
2070 7719795 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2071 0 : goto error0;
2072 7719785 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2073 0 : xfs_btree_mark_sick(cnt_cur);
2074 0 : error = -EFSCORRUPTED;
2075 0 : goto error0;
2076 : }
2077 : /*
2078 : * Delete the old by-size entry on the right.
2079 : */
2080 7719785 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2081 7 : goto error0;
2082 7719776 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2083 0 : xfs_btree_mark_sick(cnt_cur);
2084 0 : error = -EFSCORRUPTED;
2085 0 : goto error0;
2086 : }
2087 7719776 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2088 0 : goto error0;
2089 7719788 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2090 0 : xfs_btree_mark_sick(cnt_cur);
2091 0 : error = -EFSCORRUPTED;
2092 0 : goto error0;
2093 : }
2094 : /*
2095 : * Delete the old by-block entry for the right block.
2096 : */
2097 7719788 : if ((error = xfs_btree_delete(bno_cur, &i)))
2098 0 : goto error0;
2099 7719786 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2100 0 : xfs_btree_mark_sick(bno_cur);
2101 0 : error = -EFSCORRUPTED;
2102 0 : goto error0;
2103 : }
2104 : /*
2105 : * Move the by-block cursor back to the left neighbor.
2106 : */
2107 7719786 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2108 0 : goto error0;
2109 7719767 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2110 0 : xfs_btree_mark_sick(bno_cur);
2111 0 : error = -EFSCORRUPTED;
2112 0 : goto error0;
2113 : }
2114 : #ifdef DEBUG
2115 : /*
2116 : * Check that this is the right record: delete didn't
2117 : * mangle the cursor.
2118 : */
2119 : {
2120 7719767 : xfs_agblock_t xxbno;
2121 7719767 : xfs_extlen_t xxlen;
2122 :
2123 7719767 : if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2124 : &i)))
2125 0 : goto error0;
2126 7719779 : if (XFS_IS_CORRUPT(mp,
2127 : i != 1 ||
2128 : xxbno != ltbno ||
2129 : xxlen != ltlen)) {
2130 0 : xfs_btree_mark_sick(bno_cur);
2131 0 : error = -EFSCORRUPTED;
2132 0 : goto error0;
2133 : }
2134 : }
2135 : #endif
2136 : /*
2137 : * Update remaining by-block entry to the new, joined block.
2138 : */
2139 7719779 : nbno = ltbno;
2140 7719779 : nlen = len + ltlen + gtlen;
2141 7719779 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2142 0 : goto error0;
2143 : }
2144 : /*
2145 : * Have only a left contiguous neighbor.
2146 : * Merge it together with the new freespace.
2147 : */
2148 43656324 : else if (haveleft) {
2149 : /*
2150 : * Delete the old by-size entry on the left.
2151 : */
2152 5993668 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2153 2 : goto error0;
2154 5993677 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2155 0 : xfs_btree_mark_sick(cnt_cur);
2156 0 : error = -EFSCORRUPTED;
2157 0 : goto error0;
2158 : }
2159 5993677 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2160 0 : goto error0;
2161 5993664 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2162 0 : xfs_btree_mark_sick(cnt_cur);
2163 0 : error = -EFSCORRUPTED;
2164 0 : goto error0;
2165 : }
2166 : /*
2167 : * Back up the by-block cursor to the left neighbor, and
2168 : * update its length.
2169 : */
2170 5993664 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2171 0 : goto error0;
2172 5993661 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2173 0 : xfs_btree_mark_sick(bno_cur);
2174 0 : error = -EFSCORRUPTED;
2175 0 : goto error0;
2176 : }
2177 5993661 : nbno = ltbno;
2178 5993661 : nlen = len + ltlen;
2179 5993661 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2180 0 : goto error0;
2181 : }
2182 : /*
2183 : * Have only a right contiguous neighbor.
2184 : * Merge it together with the new freespace.
2185 : */
2186 37662656 : else if (haveright) {
2187 : /*
2188 : * Delete the old by-size entry on the right.
2189 : */
2190 10498736 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2191 15 : goto error0;
2192 10498738 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2193 0 : xfs_btree_mark_sick(cnt_cur);
2194 0 : error = -EFSCORRUPTED;
2195 0 : goto error0;
2196 : }
2197 10498738 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2198 0 : goto error0;
2199 10498730 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2200 0 : xfs_btree_mark_sick(cnt_cur);
2201 0 : error = -EFSCORRUPTED;
2202 0 : goto error0;
2203 : }
2204 : /*
2205 : * Update the starting block and length of the right
2206 : * neighbor in the by-block tree.
2207 : */
2208 10498730 : nbno = bno;
2209 10498730 : nlen = len + gtlen;
2210 10498730 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2211 0 : goto error0;
2212 : }
2213 : /*
2214 : * No contiguous neighbors.
2215 : * Insert the new freespace into the by-block tree.
2216 : */
2217 : else {
2218 27163920 : nbno = bno;
2219 27163920 : nlen = len;
2220 27163920 : if ((error = xfs_btree_insert(bno_cur, &i)))
2221 0 : goto error0;
2222 27164062 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2223 0 : xfs_btree_mark_sick(bno_cur);
2224 0 : error = -EFSCORRUPTED;
2225 0 : goto error0;
2226 : }
2227 : }
2228 51376230 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2229 51375591 : bno_cur = NULL;
2230 : /*
2231 : * In all cases we need to insert the new freespace in the by-size tree.
2232 : */
2233 51375591 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2234 22 : goto error0;
2235 51376482 : if (XFS_IS_CORRUPT(mp, i != 0)) {
2236 0 : xfs_btree_mark_sick(cnt_cur);
2237 0 : error = -EFSCORRUPTED;
2238 0 : goto error0;
2239 : }
2240 51376482 : if ((error = xfs_btree_insert(cnt_cur, &i)))
2241 0 : goto error0;
2242 51376654 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2243 0 : xfs_btree_mark_sick(cnt_cur);
2244 0 : error = -EFSCORRUPTED;
2245 0 : goto error0;
2246 : }
2247 51376654 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2248 51376520 : cnt_cur = NULL;
2249 :
2250 : /*
2251 : * Update the freespace totals in the ag and superblock.
2252 : */
2253 51376520 : error = xfs_alloc_update_counters(tp, agbp, len);
2254 51375799 : xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
2255 51374970 : if (error)
2256 0 : goto error0;
2257 :
2258 51374970 : XFS_STATS_INC(mp, xs_freex);
2259 51374970 : XFS_STATS_ADD(mp, xs_freeb, len);
2260 :
2261 51374970 : trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
2262 :
2263 51374970 : return 0;
2264 :
2265 202 : error0:
2266 202 : trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2267 202 : if (bno_cur)
2268 160 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2269 202 : if (cnt_cur)
2270 56 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2271 : return error;
2272 : }
2273 :
2274 : /*
2275 : * Visible (exported) allocation/free functions.
2276 : * Some of these are used just by xfs_alloc_btree.c and this file.
2277 : */
2278 :
2279 : /*
2280 : * Compute and fill in value of m_alloc_maxlevels.
2281 : */
2282 : void
2283 24119 : xfs_alloc_compute_maxlevels(
2284 : xfs_mount_t *mp) /* file system mount structure */
2285 : {
2286 48238 : mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
2287 24119 : (mp->m_sb.sb_agblocks + 1) / 2);
2288 24119 : ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
2289 24119 : }
2290 :
2291 : /*
2292 : * Find the length of the longest extent in an AG. The 'need' parameter
2293 : * specifies how much space we're going to need for the AGFL and the
2294 : * 'reserved' parameter tells us how many blocks in this AG are reserved for
2295 : * other callers.
2296 : */
2297 : xfs_extlen_t
2298 209380583 : xfs_alloc_longest_free_extent(
2299 : struct xfs_perag *pag,
2300 : xfs_extlen_t need,
2301 : xfs_extlen_t reserved)
2302 : {
2303 209380583 : xfs_extlen_t delta = 0;
2304 :
2305 : /*
2306 : * If the AGFL needs a recharge, we'll have to subtract that from the
2307 : * longest extent.
2308 : */
2309 209380583 : if (need > pag->pagf_flcount)
2310 349023 : delta = need - pag->pagf_flcount;
2311 :
2312 : /*
2313 : * If we cannot maintain others' reservations with space from the
2314 : * not-longest freesp extents, we'll have to subtract /that/ from
2315 : * the longest extent too.
2316 : */
2317 209380583 : if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2318 157269874 : delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2319 :
2320 : /*
2321 : * If the longest extent is long enough to satisfy all the
2322 : * reservations and AGFL rules in place, we can return this extent.
2323 : */
2324 209380583 : if (pag->pagf_longest > delta)
2325 209348980 : return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2326 : pag->pagf_longest - delta);
2327 :
2328 : /* Otherwise, let the caller try for 1 block if there's space. */
2329 31603 : return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2330 : }
2331 :
2332 : /*
2333 : * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2334 : * return the largest possible minimum length.
2335 : */
2336 : unsigned int
2337 740937690 : xfs_alloc_min_freelist(
2338 : struct xfs_mount *mp,
2339 : struct xfs_perag *pag)
2340 : {
2341 : /* AG btrees have at least 1 level. */
2342 740937690 : static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2343 740937690 : const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
2344 740937690 : unsigned int min_free;
2345 :
2346 740937690 : ASSERT(mp->m_alloc_maxlevels > 0);
2347 :
2348 : /* space needed by-bno freespace btree */
2349 740937690 : min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2350 : mp->m_alloc_maxlevels);
2351 : /* space needed by-size freespace btree */
2352 740937690 : min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2353 : mp->m_alloc_maxlevels);
2354 : /* space needed reverse mapping used space btree */
2355 740937690 : if (xfs_has_rmapbt(mp))
2356 675853284 : min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2357 : mp->m_rmap_maxlevels);
2358 :
2359 740937690 : return min_free;
2360 : }
2361 :
2362 : /*
2363 : * Check if the operation we are fixing up the freelist for should go ahead or
2364 : * not. If we are freeing blocks, we always allow it, otherwise the allocation
2365 : * is dependent on whether the size and shape of free space available will
2366 : * permit the requested allocation to take place.
2367 : */
2368 : static bool
2369 684913698 : xfs_alloc_space_available(
2370 : struct xfs_alloc_arg *args,
2371 : xfs_extlen_t min_free,
2372 : int flags)
2373 : {
2374 684913698 : struct xfs_perag *pag = args->pag;
2375 684913698 : xfs_extlen_t alloc_len, longest;
2376 684913698 : xfs_extlen_t reservation; /* blocks that are still reserved */
2377 684913698 : int available;
2378 684913698 : xfs_extlen_t agflcount;
2379 :
2380 684913698 : if (flags & XFS_ALLOC_FLAG_FREEING)
2381 : return true;
2382 :
2383 153299209 : reservation = xfs_ag_resv_needed(pag, args->resv);
2384 :
2385 : /* do we have enough contiguous free space for the allocation? */
2386 153299296 : alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2387 153299296 : longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2388 153299296 : if (longest < alloc_len)
2389 : return false;
2390 :
2391 : /*
2392 : * Do we have enough free space remaining for the allocation? Don't
2393 : * account extra agfl blocks because we are about to defer free them,
2394 : * making them unavailable until the current transaction commits.
2395 : */
2396 138776976 : agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2397 138776976 : available = (int)(pag->pagf_freeblks + agflcount -
2398 138776976 : reservation - min_free - args->minleft);
2399 138776976 : if (available < (int)max(args->total, alloc_len))
2400 : return false;
2401 :
2402 : /*
2403 : * Clamp maxlen to the amount of free space available for the actual
2404 : * extent allocation.
2405 : */
2406 119094909 : if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2407 17542 : args->maxlen = available;
2408 17542 : ASSERT(args->maxlen > 0);
2409 17542 : ASSERT(args->maxlen >= args->minlen);
2410 : }
2411 :
2412 : return true;
2413 : }
2414 :
2415 : int
2416 198096 : xfs_free_agfl_block(
2417 : struct xfs_trans *tp,
2418 : xfs_agnumber_t agno,
2419 : xfs_agblock_t agbno,
2420 : struct xfs_buf *agbp,
2421 : struct xfs_owner_info *oinfo)
2422 : {
2423 198096 : int error;
2424 198096 : struct xfs_buf *bp;
2425 :
2426 198096 : error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2427 : XFS_AG_RESV_AGFL);
2428 198097 : if (error)
2429 : return error;
2430 :
2431 198093 : error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2432 198093 : XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2433 : tp->t_mountp->m_bsize, 0, &bp);
2434 198093 : if (error)
2435 : return error;
2436 198093 : xfs_trans_binval(tp, bp);
2437 :
2438 198093 : return 0;
2439 : }
2440 :
2441 : /*
2442 : * Check the agfl fields of the agf for inconsistency or corruption.
2443 : *
2444 : * The original purpose was to detect an agfl header padding mismatch between
2445 : * current and early v5 kernels. This problem manifests as a 1-slot size
2446 : * difference between the on-disk flcount and the active [first, last] range of
2447 : * a wrapped agfl.
2448 : *
2449 : * However, we need to use these same checks to catch agfl count corruptions
2450 : * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
2451 : * way, we need to reset the agfl and warn the user.
2452 : *
2453 : * Return true if a reset is required before the agfl can be used, false
2454 : * otherwise.
2455 : */
2456 : static bool
2457 173139 : xfs_agfl_needs_reset(
2458 : struct xfs_mount *mp,
2459 : struct xfs_agf *agf)
2460 : {
2461 173139 : uint32_t f = be32_to_cpu(agf->agf_flfirst);
2462 173139 : uint32_t l = be32_to_cpu(agf->agf_fllast);
2463 173139 : uint32_t c = be32_to_cpu(agf->agf_flcount);
2464 173139 : int agfl_size = xfs_agfl_size(mp);
2465 173139 : int active;
2466 :
2467 : /*
2468 : * The agf read verifier catches severe corruption of these fields.
2469 : * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2470 : * the verifier allows it.
2471 : */
2472 173139 : if (f >= agfl_size || l >= agfl_size)
2473 : return true;
2474 173140 : if (c > agfl_size)
2475 : return true;
2476 :
2477 : /*
2478 : * Check consistency between the on-disk count and the active range. An
2479 : * agfl padding mismatch manifests as an inconsistent flcount.
2480 : */
2481 173140 : if (c && l >= f)
2482 162676 : active = l - f + 1;
2483 10464 : else if (c)
2484 234 : active = agfl_size - f + l + 1;
2485 : else
2486 : active = 0;
2487 :
2488 173140 : return active != c;
2489 : }
2490 :
2491 : /*
2492 : * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2493 : * agfl content cannot be trusted. Warn the user that a repair is required to
2494 : * recover leaked blocks.
2495 : *
2496 : * The purpose of this mechanism is to handle filesystems affected by the agfl
2497 : * header padding mismatch problem. A reset keeps the filesystem online with a
2498 : * relatively minor free space accounting inconsistency rather than suffer the
2499 : * inevitable crash from use of an invalid agfl block.
2500 : */
2501 : static void
2502 4 : xfs_agfl_reset(
2503 : struct xfs_trans *tp,
2504 : struct xfs_buf *agbp,
2505 : struct xfs_perag *pag)
2506 : {
2507 4 : struct xfs_mount *mp = tp->t_mountp;
2508 4 : struct xfs_agf *agf = agbp->b_addr;
2509 :
2510 8 : ASSERT(xfs_perag_agfl_needs_reset(pag));
2511 4 : trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2512 :
2513 4 : xfs_warn(mp,
2514 : "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2515 : "Please unmount and run xfs_repair.",
2516 : pag->pag_agno, pag->pagf_flcount);
2517 :
2518 4 : agf->agf_flfirst = 0;
2519 8 : agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2520 4 : agf->agf_flcount = 0;
2521 4 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2522 : XFS_AGF_FLCOUNT);
2523 :
2524 4 : pag->pagf_flcount = 0;
2525 4 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
2526 4 : }
2527 :
2528 : /*
2529 : * Defer an AGFL block free. This is effectively equivalent to
2530 : * xfs_free_extent_later() with some special handling particular to AGFL blocks.
2531 : *
2532 : * Deferring AGFL frees helps prevent log reservation overruns due to too many
2533 : * allocation operations in a transaction. AGFL frees are prone to this problem
2534 : * because for one they are always freed one at a time. Further, an immediate
2535 : * AGFL block free can cause a btree join and require another block free before
2536 : * the real allocation can proceed. Deferring the free disconnects freeing up
2537 : * the AGFL slot from freeing the block.
2538 : */
2539 : static int
2540 198100 : xfs_defer_agfl_block(
2541 : struct xfs_trans *tp,
2542 : xfs_agnumber_t agno,
2543 : xfs_agblock_t agbno,
2544 : struct xfs_owner_info *oinfo)
2545 : {
2546 198100 : struct xfs_mount *mp = tp->t_mountp;
2547 198100 : struct xfs_extent_free_item *xefi;
2548 198100 : xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
2549 :
2550 198100 : ASSERT(xfs_extfree_item_cache != NULL);
2551 198100 : ASSERT(oinfo != NULL);
2552 :
2553 198100 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
2554 0 : return -EFSCORRUPTED;
2555 :
2556 198100 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2557 : GFP_KERNEL | __GFP_NOFAIL);
2558 198100 : xefi->xefi_startblock = fsbno;
2559 198100 : xefi->xefi_blockcount = 1;
2560 198100 : xefi->xefi_owner = oinfo->oi_owner;
2561 198100 : xefi->xefi_agresv = XFS_AG_RESV_AGFL;
2562 :
2563 198100 : trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2564 :
2565 198100 : xfs_extent_free_get_group(mp, xefi);
2566 198100 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
2567 198100 : return 0;
2568 : }
2569 :
2570 : /*
2571 : * Add the extent to the list of extents to be free at transaction end.
2572 : * The list is maintained sorted (by block number).
2573 : */
2574 : int
2575 51173398 : __xfs_free_extent_later(
2576 : struct xfs_trans *tp,
2577 : xfs_fsblock_t bno,
2578 : xfs_filblks_t len,
2579 : const struct xfs_owner_info *oinfo,
2580 : enum xfs_ag_resv_type type,
2581 : bool skip_discard)
2582 : {
2583 51173398 : struct xfs_extent_free_item *xefi;
2584 51173398 : struct xfs_mount *mp = tp->t_mountp;
2585 : #ifdef DEBUG
2586 51173398 : xfs_agnumber_t agno;
2587 51173398 : xfs_agblock_t agbno;
2588 :
2589 51173398 : ASSERT(bno != NULLFSBLOCK);
2590 51173398 : ASSERT(len > 0);
2591 51173398 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
2592 51173398 : ASSERT(!isnullstartblock(bno));
2593 51173398 : agno = XFS_FSB_TO_AGNO(mp, bno);
2594 51173398 : agbno = XFS_FSB_TO_AGBNO(mp, bno);
2595 51173398 : ASSERT(agno < mp->m_sb.sb_agcount);
2596 51173398 : ASSERT(agbno < mp->m_sb.sb_agblocks);
2597 51173398 : ASSERT(len < mp->m_sb.sb_agblocks);
2598 51173398 : ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2599 : #endif
2600 51173398 : ASSERT(xfs_extfree_item_cache != NULL);
2601 51173398 : ASSERT(type != XFS_AG_RESV_AGFL);
2602 :
2603 51173398 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
2604 0 : return -EFSCORRUPTED;
2605 :
2606 51175954 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2607 : GFP_KERNEL | __GFP_NOFAIL);
2608 51176075 : xefi->xefi_startblock = bno;
2609 51176075 : xefi->xefi_blockcount = (xfs_extlen_t)len;
2610 51176075 : xefi->xefi_agresv = type;
2611 51176075 : if (skip_discard)
2612 1571224 : xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2613 51176075 : if (oinfo) {
2614 849505 : ASSERT(oinfo->oi_offset == 0);
2615 :
2616 849505 : if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2617 25 : xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
2618 849505 : if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2619 607606 : xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2620 849505 : xefi->xefi_owner = oinfo->oi_owner;
2621 : } else {
2622 50326570 : xefi->xefi_owner = XFS_RMAP_OWN_NULL;
2623 : }
2624 51176075 : trace_xfs_bmap_free_defer(mp,
2625 51176075 : XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2626 : XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2627 :
2628 51172627 : xfs_extent_free_get_group(mp, xefi);
2629 51169320 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
2630 51169320 : return 0;
2631 : }
2632 :
2633 : #ifdef DEBUG
2634 : /*
2635 : * Check if an AGF has a free extent record whose length is equal to
2636 : * args->minlen.
2637 : */
2638 : STATIC int
2639 373585 : xfs_exact_minlen_extent_available(
2640 : struct xfs_alloc_arg *args,
2641 : struct xfs_buf *agbp,
2642 : int *stat)
2643 : {
2644 373585 : struct xfs_btree_cur *cnt_cur;
2645 373585 : xfs_agblock_t fbno;
2646 373585 : xfs_extlen_t flen;
2647 373585 : int error = 0;
2648 :
2649 373585 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
2650 : args->pag, XFS_BTNUM_CNT);
2651 373586 : error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
2652 373552 : if (error)
2653 0 : goto out;
2654 :
2655 373552 : if (*stat == 0) {
2656 0 : xfs_btree_mark_sick(cnt_cur);
2657 0 : error = -EFSCORRUPTED;
2658 0 : goto out;
2659 : }
2660 :
2661 373552 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
2662 373574 : if (error)
2663 0 : goto out;
2664 :
2665 373574 : if (*stat == 1 && flen != args->minlen)
2666 3456 : *stat = 0;
2667 :
2668 370118 : out:
2669 373574 : xfs_btree_del_cursor(cnt_cur, error);
2670 :
2671 373587 : return error;
2672 : }
2673 : #endif
2674 :
2675 : /*
2676 : * Decide whether to use this allocation group for this allocation.
2677 : * If so, fix up the btree freelist's size.
2678 : */
2679 : int /* error */
2680 361022854 : xfs_alloc_fix_freelist(
2681 : struct xfs_alloc_arg *args, /* allocation argument structure */
2682 : uint32_t alloc_flags)
2683 : {
2684 361022854 : struct xfs_mount *mp = args->mp;
2685 361022854 : struct xfs_perag *pag = args->pag;
2686 361022854 : struct xfs_trans *tp = args->tp;
2687 361022854 : struct xfs_buf *agbp = NULL;
2688 361022854 : struct xfs_buf *agflbp = NULL;
2689 361022854 : struct xfs_alloc_arg targs; /* local allocation arguments */
2690 361022854 : xfs_agblock_t bno; /* freelist block */
2691 361022854 : xfs_extlen_t need; /* total blocks needed in freelist */
2692 361022854 : int error = 0;
2693 :
2694 : /* deferred ops (AGFL block frees) require permanent transactions */
2695 361022854 : ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2696 :
2697 722045708 : if (!xfs_perag_initialised_agf(pag)) {
2698 2576 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2699 2576 : if (error) {
2700 : /* Couldn't lock the AGF so skip this AG. */
2701 1644 : if (error == -EAGAIN)
2702 1644 : error = 0;
2703 1644 : goto out_no_agbp;
2704 : }
2705 : }
2706 :
2707 : /*
2708 : * If this is a metadata preferred pag and we are user data then try
2709 : * somewhere else if we are not being asked to try harder at this
2710 : * point
2711 : */
2712 722042420 : if (xfs_perag_prefers_metadata(pag) &&
2713 0 : (args->datatype & XFS_ALLOC_USERDATA) &&
2714 0 : (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2715 0 : ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
2716 0 : goto out_agbp_relse;
2717 : }
2718 :
2719 361021210 : need = xfs_alloc_min_freelist(mp, pag);
2720 361021866 : if (!xfs_alloc_space_available(args, need, alloc_flags |
2721 : XFS_ALLOC_FLAG_CHECK))
2722 34203970 : goto out_agbp_relse;
2723 :
2724 : /*
2725 : * Get the a.g. freespace buffer.
2726 : * Can fail if we're not blocking on locks, and it's held.
2727 : */
2728 326820992 : if (!agbp) {
2729 326819888 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2730 326829531 : if (error) {
2731 : /* Couldn't lock the AGF so skip this AG. */
2732 2812530 : if (error == -EAGAIN)
2733 2811500 : error = 0;
2734 2812530 : goto out_no_agbp;
2735 : }
2736 : }
2737 :
2738 : /* reset a padding mismatched agfl before final free space check */
2739 648036210 : if (xfs_perag_agfl_needs_reset(pag))
2740 4 : xfs_agfl_reset(tp, agbp, pag);
2741 :
2742 : /* If there isn't enough total space or single-extent, reject it. */
2743 324018105 : need = xfs_alloc_min_freelist(mp, pag);
2744 324020199 : if (!xfs_alloc_space_available(args, need, alloc_flags))
2745 415 : goto out_agbp_relse;
2746 :
2747 : #ifdef DEBUG
2748 324009957 : if (args->alloc_minlen_only) {
2749 373496 : int stat;
2750 :
2751 373496 : error = xfs_exact_minlen_extent_available(args, agbp, &stat);
2752 373581 : if (error || !stat)
2753 3456 : goto out_agbp_relse;
2754 : }
2755 : #endif
2756 : /*
2757 : * Make the freelist shorter if it's too long.
2758 : *
2759 : * Note that from this point onwards, we will always release the agf and
2760 : * agfl buffers on error. This handles the case where we error out and
2761 : * the buffers are clean or may not have been joined to the transaction
2762 : * and hence need to be released manually. If they have been joined to
2763 : * the transaction, then xfs_trans_brelse() will handle them
2764 : * appropriately based on the recursion count and dirty state of the
2765 : * buffer.
2766 : *
2767 : * XXX (dgc): When we have lots of free space, does this buy us
2768 : * anything other than extra overhead when we need to put more blocks
2769 : * back on the free list? Maybe we should only do this when space is
2770 : * getting low or the AGFL is more than half full?
2771 : *
2772 : * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2773 : * big; the NORMAP flag prevents AGFL expand/shrink operations from
2774 : * updating the rmapbt. Both flags are used in xfs_repair while we're
2775 : * rebuilding the rmapbt, and neither are used by the kernel. They're
2776 : * both required to ensure that rmaps are correctly recorded for the
2777 : * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2778 : * repair/rmap.c in xfsprogs for details.
2779 : */
2780 324006586 : memset(&targs, 0, sizeof(targs));
2781 : /* struct copy below */
2782 324006586 : if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
2783 5480 : targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2784 : else
2785 324001106 : targs.oinfo = XFS_RMAP_OINFO_AG;
2786 324204686 : while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
2787 324204686 : pag->pagf_flcount > need) {
2788 192917 : error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
2789 198100 : if (error)
2790 0 : goto out_agbp_relse;
2791 :
2792 : /* defer agfl frees */
2793 198100 : error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2794 198100 : if (error)
2795 0 : goto out_agbp_relse;
2796 : }
2797 :
2798 324011769 : targs.tp = tp;
2799 324011769 : targs.mp = mp;
2800 324011769 : targs.agbp = agbp;
2801 324011769 : targs.agno = args->agno;
2802 324011769 : targs.alignment = targs.minlen = targs.prod = 1;
2803 324011769 : targs.pag = pag;
2804 324011769 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2805 324016986 : if (error)
2806 737 : goto out_agbp_relse;
2807 :
2808 : /* Make the freelist longer if it's too short. */
2809 324374847 : while (pag->pagf_flcount < need) {
2810 361465 : targs.agbno = 0;
2811 361465 : targs.maxlen = need - pag->pagf_flcount;
2812 361465 : targs.resv = XFS_AG_RESV_AGFL;
2813 :
2814 : /* Allocate as many blocks as possible at once. */
2815 361465 : error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
2816 358615 : if (error)
2817 5 : goto out_agflbp_relse;
2818 :
2819 : /*
2820 : * Stop if we run out. Won't happen if callers are obeying
2821 : * the restrictions correctly. Can happen for free calls
2822 : * on a completely full ag.
2823 : */
2824 358610 : if (targs.agbno == NULLAGBLOCK) {
2825 0 : if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
2826 : break;
2827 0 : goto out_agflbp_relse;
2828 : }
2829 :
2830 358610 : if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
2831 358605 : error = xfs_rmap_alloc(tp, agbp, pag,
2832 : targs.agbno, targs.len, &targs.oinfo);
2833 358605 : if (error)
2834 12 : goto out_agflbp_relse;
2835 : }
2836 358598 : error = xfs_alloc_update_counters(tp, agbp,
2837 358598 : -((long)(targs.len)));
2838 358598 : if (error)
2839 0 : goto out_agflbp_relse;
2840 :
2841 : /*
2842 : * Put each allocated block on the list.
2843 : */
2844 778353 : for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2845 419755 : error = xfs_alloc_put_freelist(pag, tp, agbp,
2846 : agflbp, bno, 0);
2847 419755 : if (error)
2848 0 : goto out_agflbp_relse;
2849 : }
2850 : }
2851 324013382 : xfs_trans_brelse(tp, agflbp);
2852 324018813 : args->agbp = agbp;
2853 324018813 : return 0;
2854 :
2855 17 : out_agflbp_relse:
2856 17 : xfs_trans_brelse(tp, agflbp);
2857 34208595 : out_agbp_relse:
2858 34208595 : if (agbp)
2859 4628 : xfs_trans_brelse(tp, agbp);
2860 34203967 : out_no_agbp:
2861 37022766 : args->agbp = NULL;
2862 37022766 : return error;
2863 : }
2864 :
2865 : /*
2866 : * Get a block from the freelist.
2867 : * Returns with the buffer for the block gotten.
2868 : */
2869 : int
2870 596243 : xfs_alloc_get_freelist(
2871 : struct xfs_perag *pag,
2872 : struct xfs_trans *tp,
2873 : struct xfs_buf *agbp,
2874 : xfs_agblock_t *bnop,
2875 : int btreeblk)
2876 : {
2877 596243 : struct xfs_agf *agf = agbp->b_addr;
2878 596243 : struct xfs_buf *agflbp;
2879 596243 : xfs_agblock_t bno;
2880 596243 : __be32 *agfl_bno;
2881 596243 : int error;
2882 596243 : uint32_t logflags;
2883 596243 : struct xfs_mount *mp = tp->t_mountp;
2884 :
2885 : /*
2886 : * Freelist is empty, give up.
2887 : */
2888 596243 : if (!agf->agf_flcount) {
2889 0 : *bnop = NULLAGBLOCK;
2890 0 : return 0;
2891 : }
2892 : /*
2893 : * Read the array of free blocks.
2894 : */
2895 596243 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2896 596243 : if (error)
2897 : return error;
2898 :
2899 :
2900 : /*
2901 : * Get the block number and update the data structures.
2902 : */
2903 596243 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2904 596243 : bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2905 1192486 : if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
2906 0 : return -EFSCORRUPTED;
2907 :
2908 596243 : be32_add_cpu(&agf->agf_flfirst, 1);
2909 596243 : xfs_trans_brelse(tp, agflbp);
2910 1192486 : if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2911 405 : agf->agf_flfirst = 0;
2912 :
2913 1192486 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
2914 596243 : be32_add_cpu(&agf->agf_flcount, -1);
2915 596243 : pag->pagf_flcount--;
2916 :
2917 596243 : logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2918 596243 : if (btreeblk) {
2919 398143 : be32_add_cpu(&agf->agf_btreeblks, 1);
2920 398143 : pag->pagf_btreeblks++;
2921 398143 : logflags |= XFS_AGF_BTREEBLKS;
2922 : }
2923 :
2924 596243 : xfs_alloc_log_agf(tp, agbp, logflags);
2925 596243 : *bnop = bno;
2926 :
2927 596243 : return 0;
2928 : }
2929 :
2930 : /*
2931 : * Log the given fields from the agf structure.
2932 : */
2933 : void
2934 137733233 : xfs_alloc_log_agf(
2935 : struct xfs_trans *tp,
2936 : struct xfs_buf *bp,
2937 : uint32_t fields)
2938 : {
2939 137733233 : int first; /* first byte offset */
2940 137733233 : int last; /* last byte offset */
2941 137733233 : static const short offsets[] = {
2942 : offsetof(xfs_agf_t, agf_magicnum),
2943 : offsetof(xfs_agf_t, agf_versionnum),
2944 : offsetof(xfs_agf_t, agf_seqno),
2945 : offsetof(xfs_agf_t, agf_length),
2946 : offsetof(xfs_agf_t, agf_roots[0]),
2947 : offsetof(xfs_agf_t, agf_levels[0]),
2948 : offsetof(xfs_agf_t, agf_flfirst),
2949 : offsetof(xfs_agf_t, agf_fllast),
2950 : offsetof(xfs_agf_t, agf_flcount),
2951 : offsetof(xfs_agf_t, agf_freeblks),
2952 : offsetof(xfs_agf_t, agf_longest),
2953 : offsetof(xfs_agf_t, agf_btreeblks),
2954 : offsetof(xfs_agf_t, agf_uuid),
2955 : offsetof(xfs_agf_t, agf_rmap_blocks),
2956 : offsetof(xfs_agf_t, agf_refcount_blocks),
2957 : offsetof(xfs_agf_t, agf_refcount_root),
2958 : offsetof(xfs_agf_t, agf_refcount_level),
2959 : /* needed so that we don't log the whole rest of the structure: */
2960 : offsetof(xfs_agf_t, agf_spare64),
2961 : sizeof(xfs_agf_t)
2962 : };
2963 :
2964 137733233 : trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
2965 :
2966 137734516 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2967 :
2968 137734279 : xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2969 137734939 : xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2970 137734674 : }
2971 :
2972 : /*
2973 : * Put the block on the freelist for the allocation group.
2974 : */
2975 : int
2976 598341 : xfs_alloc_put_freelist(
2977 : struct xfs_perag *pag,
2978 : struct xfs_trans *tp,
2979 : struct xfs_buf *agbp,
2980 : struct xfs_buf *agflbp,
2981 : xfs_agblock_t bno,
2982 : int btreeblk)
2983 : {
2984 598341 : struct xfs_mount *mp = tp->t_mountp;
2985 598341 : struct xfs_agf *agf = agbp->b_addr;
2986 598341 : __be32 *blockp;
2987 598341 : int error;
2988 598341 : uint32_t logflags;
2989 598341 : __be32 *agfl_bno;
2990 598341 : int startoff;
2991 :
2992 598341 : if (!agflbp) {
2993 178586 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2994 178586 : if (error)
2995 : return error;
2996 : }
2997 :
2998 598341 : be32_add_cpu(&agf->agf_fllast, 1);
2999 1196682 : if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
3000 411 : agf->agf_fllast = 0;
3001 :
3002 1196682 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
3003 598341 : be32_add_cpu(&agf->agf_flcount, 1);
3004 598341 : pag->pagf_flcount++;
3005 :
3006 598341 : logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
3007 598341 : if (btreeblk) {
3008 178586 : be32_add_cpu(&agf->agf_btreeblks, -1);
3009 178586 : pag->pagf_btreeblks--;
3010 178586 : logflags |= XFS_AGF_BTREEBLKS;
3011 : }
3012 :
3013 598341 : xfs_alloc_log_agf(tp, agbp, logflags);
3014 :
3015 1196682 : ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
3016 :
3017 598341 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3018 598341 : blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
3019 598341 : *blockp = cpu_to_be32(bno);
3020 598341 : startoff = (char *)blockp - (char *)agflbp->b_addr;
3021 :
3022 598341 : xfs_alloc_log_agf(tp, agbp, logflags);
3023 :
3024 598341 : xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
3025 598341 : xfs_trans_log_buf(tp, agflbp, startoff,
3026 : startoff + sizeof(xfs_agblock_t) - 1);
3027 598341 : return 0;
3028 : }
3029 :
3030 : /*
3031 : * Check that this AGF/AGI header's sequence number and length matches the AG
3032 : * number and size in fsblocks.
3033 : */
3034 : xfs_failaddr_t
3035 2597994 : xfs_validate_ag_length(
3036 : struct xfs_buf *bp,
3037 : uint32_t seqno,
3038 : uint32_t length)
3039 : {
3040 2597994 : struct xfs_mount *mp = bp->b_mount;
3041 : /*
3042 : * During growfs operations, the perag is not fully initialised,
3043 : * so we can't use it for any useful checking. growfs ensures we can't
3044 : * use it by using uncached buffers that don't have the perag attached
3045 : * so we can detect and avoid this problem.
3046 : */
3047 2597994 : if (bp->b_pag && seqno != bp->b_pag->pag_agno)
3048 0 : return __this_address;
3049 :
3050 : /*
3051 : * Only the last AG in the filesystem is allowed to be shorter
3052 : * than the AG size recorded in the superblock.
3053 : */
3054 2597994 : if (length != mp->m_sb.sb_agblocks) {
3055 : /*
3056 : * During growfs, the new last AG can get here before we
3057 : * have updated the superblock. Give it a pass on the seqno
3058 : * check.
3059 : */
3060 1093 : if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
3061 0 : return __this_address;
3062 1093 : if (length < XFS_MIN_AG_BLOCKS)
3063 0 : return __this_address;
3064 1093 : if (length > mp->m_sb.sb_agblocks)
3065 2 : return __this_address;
3066 : }
3067 :
3068 : return NULL;
3069 : }
3070 :
3071 : /*
3072 : * Verify the AGF is consistent.
3073 : *
3074 : * We do not verify the AGFL indexes in the AGF are fully consistent here
3075 : * because of issues with variable on-disk structure sizes. Instead, we check
3076 : * the agfl indexes for consistency when we initialise the perag from the AGF
3077 : * information after a read completes.
3078 : *
3079 : * If the index is inconsistent, then we mark the perag as needing an AGFL
3080 : * reset. The first AGFL update performed then resets the AGFL indexes and
3081 : * refills the AGFL with known good free blocks, allowing the filesystem to
3082 : * continue operating normally at the cost of a few leaked free space blocks.
3083 : */
3084 : static xfs_failaddr_t
3085 1468229 : xfs_agf_verify(
3086 : struct xfs_buf *bp)
3087 : {
3088 1468229 : struct xfs_mount *mp = bp->b_mount;
3089 1468229 : struct xfs_agf *agf = bp->b_addr;
3090 1468229 : xfs_failaddr_t fa;
3091 1468229 : uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
3092 1468229 : uint32_t agf_length = be32_to_cpu(agf->agf_length);
3093 :
3094 1468229 : if (xfs_has_crc(mp)) {
3095 1465913 : if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
3096 0 : return __this_address;
3097 1465911 : if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
3098 0 : return __this_address;
3099 : }
3100 :
3101 1468229 : if (!xfs_verify_magic(bp, agf->agf_magicnum))
3102 0 : return __this_address;
3103 :
3104 1468231 : if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
3105 5 : return __this_address;
3106 :
3107 : /*
3108 : * Both agf_seqno and agf_length need to validated before anything else
3109 : * block number related in the AGF or AGFL can be checked.
3110 : */
3111 1468226 : fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
3112 1468224 : if (fa)
3113 : return fa;
3114 :
3115 2936450 : if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
3116 2 : return __this_address;
3117 2936446 : if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
3118 0 : return __this_address;
3119 2936446 : if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
3120 0 : return __this_address;
3121 :
3122 4404669 : if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
3123 : be32_to_cpu(agf->agf_freeblks) > agf_length)
3124 6 : return __this_address;
3125 :
3126 1468220 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
3127 1468220 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
3128 1468220 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
3129 2936436 : mp->m_alloc_maxlevels ||
3130 1468216 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
3131 : mp->m_alloc_maxlevels)
3132 4 : return __this_address;
3133 :
3134 2936426 : if (xfs_has_lazysbcount(mp) &&
3135 1468209 : be32_to_cpu(agf->agf_btreeblks) > agf_length)
3136 0 : return __this_address;
3137 :
3138 1468217 : if (xfs_has_rmapbt(mp)) {
3139 2647298 : if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
3140 2 : return __this_address;
3141 :
3142 1323647 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
3143 1323647 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
3144 1323647 : mp->m_rmap_maxlevels)
3145 0 : return __this_address;
3146 : }
3147 :
3148 1468215 : if (xfs_has_reflink(mp)) {
3149 2647172 : if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
3150 2 : return __this_address;
3151 :
3152 1323584 : if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
3153 2647166 : be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
3154 1 : return __this_address;
3155 : }
3156 :
3157 : return NULL;
3158 : }
3159 :
3160 : static void
3161 218910 : xfs_agf_read_verify(
3162 : struct xfs_buf *bp)
3163 : {
3164 218910 : struct xfs_mount *mp = bp->b_mount;
3165 218910 : xfs_failaddr_t fa;
3166 :
3167 437802 : if (xfs_has_crc(mp) &&
3168 : !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
3169 8 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3170 : else {
3171 218902 : fa = xfs_agf_verify(bp);
3172 218902 : if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3173 12 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3174 : }
3175 218910 : }
3176 :
3177 : static void
3178 893492 : xfs_agf_write_verify(
3179 : struct xfs_buf *bp)
3180 : {
3181 893492 : struct xfs_mount *mp = bp->b_mount;
3182 893492 : struct xfs_buf_log_item *bip = bp->b_log_item;
3183 893492 : struct xfs_agf *agf = bp->b_addr;
3184 893492 : xfs_failaddr_t fa;
3185 :
3186 893492 : fa = xfs_agf_verify(bp);
3187 893492 : if (fa) {
3188 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3189 0 : return;
3190 : }
3191 :
3192 893492 : if (!xfs_has_crc(mp))
3193 : return;
3194 :
3195 891198 : if (bip)
3196 885790 : agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
3197 :
3198 891198 : xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
3199 : }
3200 :
3201 : const struct xfs_buf_ops xfs_agf_buf_ops = {
3202 : .name = "xfs_agf",
3203 : .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
3204 : .verify_read = xfs_agf_read_verify,
3205 : .verify_write = xfs_agf_write_verify,
3206 : .verify_struct = xfs_agf_verify,
3207 : };
3208 :
3209 : /*
3210 : * Read in the allocation group header (free/alloc section).
3211 : */
3212 : int
3213 983666099 : xfs_read_agf(
3214 : struct xfs_perag *pag,
3215 : struct xfs_trans *tp,
3216 : int flags,
3217 : struct xfs_buf **agfbpp)
3218 : {
3219 983666099 : struct xfs_mount *mp = pag->pag_mount;
3220 983666099 : int error;
3221 :
3222 983666099 : trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
3223 :
3224 983669373 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3225 983669373 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3226 : XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
3227 983713860 : if (xfs_metadata_is_sick(error))
3228 20 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
3229 983713860 : if (error)
3230 : return error;
3231 :
3232 980898573 : xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
3233 980898573 : return 0;
3234 : }
3235 :
3236 : /*
3237 : * Read in the allocation group header (free/alloc section) and initialise the
3238 : * perag structure if necessary. If the caller provides @agfbpp, then return the
3239 : * locked buffer to the caller, otherwise free it.
3240 : */
3241 : int
3242 983654881 : xfs_alloc_read_agf(
3243 : struct xfs_perag *pag,
3244 : struct xfs_trans *tp,
3245 : int flags,
3246 : struct xfs_buf **agfbpp)
3247 : {
3248 983654881 : struct xfs_buf *agfbp;
3249 983654881 : struct xfs_agf *agf;
3250 983654881 : int error;
3251 983654881 : int allocbt_blks;
3252 :
3253 983654881 : trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
3254 :
3255 : /* We don't support trylock when freeing. */
3256 983674097 : ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3257 : (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
3258 983674097 : error = xfs_read_agf(pag, tp,
3259 983674097 : (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
3260 : &agfbp);
3261 983709566 : if (error)
3262 : return error;
3263 :
3264 980893376 : agf = agfbp->b_addr;
3265 1961786752 : if (!xfs_perag_initialised_agf(pag)) {
3266 173139 : pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
3267 173139 : pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
3268 173139 : pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3269 173139 : pag->pagf_longest = be32_to_cpu(agf->agf_longest);
3270 173139 : pag->pagf_levels[XFS_BTNUM_BNOi] =
3271 173139 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
3272 173139 : pag->pagf_levels[XFS_BTNUM_CNTi] =
3273 173139 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3274 173139 : pag->pagf_levels[XFS_BTNUM_RMAPi] =
3275 173139 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
3276 173139 : pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
3277 173139 : if (xfs_agfl_needs_reset(pag->pag_mount, agf))
3278 4 : set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3279 : else
3280 173135 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3281 :
3282 : /*
3283 : * Update the in-core allocbt counter. Filter out the rmapbt
3284 : * subset of the btreeblks counter because the rmapbt is managed
3285 : * by perag reservation. Subtract one for the rmapbt root block
3286 : * because the rmap counter includes it while the btreeblks
3287 : * counter only tracks non-root blocks.
3288 : */
3289 173140 : allocbt_blks = pag->pagf_btreeblks;
3290 173140 : if (xfs_has_rmapbt(pag->pag_mount))
3291 172266 : allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
3292 173140 : if (allocbt_blks > 0)
3293 54236 : atomic64_add(allocbt_blks,
3294 : &pag->pag_mount->m_allocbt_blks);
3295 :
3296 173140 : set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
3297 : }
3298 : #ifdef DEBUG
3299 1961440474 : else if (!xfs_is_shutdown(pag->pag_mount)) {
3300 1961422820 : ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
3301 1961422820 : ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
3302 1961422820 : ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3303 1961422820 : ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
3304 1961422820 : ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
3305 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
3306 1961422820 : ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
3307 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
3308 : }
3309 : #endif
3310 980893377 : if (agfbpp)
3311 980548850 : *agfbpp = agfbp;
3312 : else
3313 344527 : xfs_trans_brelse(tp, agfbp);
3314 : return 0;
3315 : }
3316 :
3317 : /*
3318 : * Pre-proces allocation arguments to set initial state that we don't require
3319 : * callers to set up correctly, as well as bounds check the allocation args
3320 : * that are set up.
3321 : */
3322 : static int
3323 58704099 : xfs_alloc_vextent_check_args(
3324 : struct xfs_alloc_arg *args,
3325 : xfs_fsblock_t target,
3326 : xfs_agnumber_t *minimum_agno)
3327 : {
3328 58704099 : struct xfs_mount *mp = args->mp;
3329 58704099 : xfs_agblock_t agsize;
3330 :
3331 58704099 : args->fsbno = NULLFSBLOCK;
3332 :
3333 58704099 : *minimum_agno = 0;
3334 58704099 : if (args->tp->t_highest_agno != NULLAGNUMBER)
3335 676332 : *minimum_agno = args->tp->t_highest_agno;
3336 :
3337 : /*
3338 : * Just fix this up, for the case where the last a.g. is shorter
3339 : * (or there's only one a.g.) and the caller couldn't easily figure
3340 : * that out (xfs_bmap_alloc).
3341 : */
3342 58704099 : agsize = mp->m_sb.sb_agblocks;
3343 58704099 : if (args->maxlen > agsize)
3344 0 : args->maxlen = agsize;
3345 58704099 : if (args->alignment == 0)
3346 1251908 : args->alignment = 1;
3347 :
3348 58704099 : ASSERT(args->minlen > 0);
3349 58704099 : ASSERT(args->maxlen > 0);
3350 58704099 : ASSERT(args->alignment > 0);
3351 58704099 : ASSERT(args->resv != XFS_AG_RESV_AGFL);
3352 :
3353 58704099 : ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
3354 58704099 : ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
3355 58704099 : ASSERT(args->minlen <= args->maxlen);
3356 58704099 : ASSERT(args->minlen <= agsize);
3357 58704099 : ASSERT(args->mod < args->prod);
3358 :
3359 58704099 : if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
3360 58704099 : XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
3361 58704099 : args->minlen > args->maxlen || args->minlen > agsize ||
3362 58704099 : args->mod >= args->prod) {
3363 0 : trace_xfs_alloc_vextent_badargs(args);
3364 0 : return -ENOSPC;
3365 : }
3366 :
3367 58704099 : if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
3368 0 : trace_xfs_alloc_vextent_skip_deadlock(args);
3369 0 : return -ENOSPC;
3370 : }
3371 : return 0;
3372 :
3373 : }
3374 :
3375 : /*
3376 : * Prepare an AG for allocation. If the AG is not prepared to accept the
3377 : * allocation, return failure.
3378 : *
3379 : * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
3380 : * modified to hold their own perag references.
3381 : */
3382 : static int
3383 95154440 : xfs_alloc_vextent_prepare_ag(
3384 : struct xfs_alloc_arg *args,
3385 : uint32_t alloc_flags)
3386 : {
3387 95154440 : bool need_pag = !args->pag;
3388 95154440 : int error;
3389 :
3390 95154440 : if (need_pag)
3391 0 : args->pag = xfs_perag_get(args->mp, args->agno);
3392 :
3393 95154440 : args->agbp = NULL;
3394 95154440 : error = xfs_alloc_fix_freelist(args, alloc_flags);
3395 95157284 : if (error) {
3396 738 : trace_xfs_alloc_vextent_nofix(args);
3397 738 : if (need_pag)
3398 0 : xfs_perag_put(args->pag);
3399 738 : args->agbno = NULLAGBLOCK;
3400 738 : return error;
3401 : }
3402 95156546 : if (!args->agbp) {
3403 : /* cannot allocate in this AG at all */
3404 37020973 : trace_xfs_alloc_vextent_noagbp(args);
3405 37020949 : args->agbno = NULLAGBLOCK;
3406 37020949 : return 0;
3407 : }
3408 58135573 : args->wasfromfl = 0;
3409 58135573 : return 0;
3410 : }
3411 :
3412 : /*
3413 : * Post-process allocation results to account for the allocation if it succeed
3414 : * and set the allocated block number correctly for the caller.
3415 : *
3416 : * XXX: we should really be returning ENOSPC for ENOSPC, not
3417 : * hiding it behind a "successful" NULLFSBLOCK allocation.
3418 : */
3419 : static int
3420 58708875 : xfs_alloc_vextent_finish(
3421 : struct xfs_alloc_arg *args,
3422 : xfs_agnumber_t minimum_agno,
3423 : int alloc_error,
3424 : bool drop_perag)
3425 : {
3426 58708875 : struct xfs_mount *mp = args->mp;
3427 58708875 : int error = 0;
3428 :
3429 : /*
3430 : * We can end up here with a locked AGF. If we failed, the caller is
3431 : * likely going to try to allocate again with different parameters, and
3432 : * that can widen the AGs that are searched for free space. If we have
3433 : * to do BMBT block allocation, we have to do a new allocation.
3434 : *
3435 : * Hence leaving this function with the AGF locked opens up potential
3436 : * ABBA AGF deadlocks because a future allocation attempt in this
3437 : * transaction may attempt to lock a lower number AGF.
3438 : *
3439 : * We can't release the AGF until the transaction is commited, so at
3440 : * this point we must update the "first allocation" tracker to point at
3441 : * this AG if the tracker is empty or points to a lower AG. This allows
3442 : * the next allocation attempt to be modified appropriately to avoid
3443 : * deadlocks.
3444 : */
3445 58708875 : if (args->agbp &&
3446 58135305 : (args->tp->t_highest_agno == NULLAGNUMBER ||
3447 676329 : args->agno > minimum_agno))
3448 57498352 : args->tp->t_highest_agno = args->agno;
3449 :
3450 : /*
3451 : * If the allocation failed with an error or we had an ENOSPC result,
3452 : * preserve the returned error whilst also marking the allocation result
3453 : * as "no extent allocated". This ensures that callers that fail to
3454 : * capture the error will still treat it as a failed allocation.
3455 : */
3456 58708875 : if (alloc_error || args->agbno == NULLAGBLOCK) {
3457 785994 : args->fsbno = NULLFSBLOCK;
3458 785994 : error = alloc_error;
3459 785994 : goto out_drop_perag;
3460 : }
3461 :
3462 57922881 : args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3463 :
3464 57922881 : ASSERT(args->len >= args->minlen);
3465 57922881 : ASSERT(args->len <= args->maxlen);
3466 57922881 : ASSERT(args->agbno % args->alignment == 0);
3467 57922881 : XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
3468 :
3469 : /* if not file data, insert new block into the reverse map btree */
3470 57922881 : if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
3471 1793988 : error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
3472 1793988 : args->agbno, args->len, &args->oinfo);
3473 1793984 : if (error)
3474 55 : goto out_drop_perag;
3475 : }
3476 :
3477 57922822 : if (!args->wasfromfl) {
3478 57922245 : error = xfs_alloc_update_counters(args->tp, args->agbp,
3479 57922245 : -((long)(args->len)));
3480 57922101 : if (error)
3481 0 : goto out_drop_perag;
3482 :
3483 57922101 : ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
3484 : args->len));
3485 : }
3486 :
3487 57922292 : xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
3488 :
3489 57920785 : XFS_STATS_INC(mp, xs_allocx);
3490 57920785 : XFS_STATS_ADD(mp, xs_allocb, args->len);
3491 :
3492 57920785 : trace_xfs_alloc_vextent_finish(args);
3493 :
3494 58706903 : out_drop_perag:
3495 58706903 : if (drop_perag && args->pag) {
3496 57293049 : xfs_perag_rele(args->pag);
3497 57293934 : args->pag = NULL;
3498 : }
3499 58707788 : return error;
3500 : }
3501 :
3502 : /*
3503 : * Allocate within a single AG only. This uses a best-fit length algorithm so if
3504 : * you need an exact sized allocation without locality constraints, this is the
3505 : * fastest way to do it.
3506 : *
3507 : * Caller is expected to hold a perag reference in args->pag.
3508 : */
3509 : int
3510 0 : xfs_alloc_vextent_this_ag(
3511 : struct xfs_alloc_arg *args,
3512 : xfs_agnumber_t agno)
3513 : {
3514 0 : struct xfs_mount *mp = args->mp;
3515 0 : xfs_agnumber_t minimum_agno;
3516 0 : uint32_t alloc_flags = 0;
3517 0 : int error;
3518 :
3519 0 : ASSERT(args->pag != NULL);
3520 0 : ASSERT(args->pag->pag_agno == agno);
3521 :
3522 0 : args->agno = agno;
3523 0 : args->agbno = 0;
3524 :
3525 0 : trace_xfs_alloc_vextent_this_ag(args);
3526 :
3527 0 : error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
3528 : &minimum_agno);
3529 0 : if (error) {
3530 0 : if (error == -ENOSPC)
3531 : return 0;
3532 0 : return error;
3533 : }
3534 :
3535 0 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3536 0 : if (!error && args->agbp)
3537 0 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3538 :
3539 0 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3540 : }
3541 :
3542 : /*
3543 : * Iterate all AGs trying to allocate an extent starting from @start_ag.
3544 : *
3545 : * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
3546 : * allocation attempts in @start_agno have locality information. If we fail to
3547 : * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
3548 : * we attempt to allocation in as there is no locality optimisation possible for
3549 : * those allocations.
3550 : *
3551 : * On return, args->pag may be left referenced if we finish before the "all
3552 : * failed" return point. The allocation finish still needs the perag, and
3553 : * so the caller will release it once they've finished the allocation.
3554 : *
3555 : * When we wrap the AG iteration at the end of the filesystem, we have to be
3556 : * careful not to wrap into AGs below ones we already have locked in the
3557 : * transaction if we are doing a blocking iteration. This will result in an
3558 : * out-of-order locking of AGFs and hence can cause deadlocks.
3559 : */
3560 : static int
3561 57225737 : xfs_alloc_vextent_iterate_ags(
3562 : struct xfs_alloc_arg *args,
3563 : xfs_agnumber_t minimum_agno,
3564 : xfs_agnumber_t start_agno,
3565 : xfs_agblock_t target_agbno,
3566 : uint32_t alloc_flags)
3567 : {
3568 57225737 : struct xfs_mount *mp = args->mp;
3569 57225737 : xfs_agnumber_t restart_agno = minimum_agno;
3570 57225737 : xfs_agnumber_t agno;
3571 57225737 : int error = 0;
3572 :
3573 57225737 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
3574 57225737 : restart_agno = 0;
3575 0 : restart:
3576 93723433 : for_each_perag_wrap_range(mp, start_agno, restart_agno,
3577 : mp->m_sb.sb_agcount, agno, args->pag) {
3578 93672951 : args->agno = agno;
3579 93672951 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3580 93676711 : if (error)
3581 : break;
3582 93675974 : if (!args->agbp) {
3583 36464807 : trace_xfs_alloc_vextent_loopfailed(args);
3584 36464810 : continue;
3585 : }
3586 :
3587 : /*
3588 : * Allocation is supposed to succeed now, so break out of the
3589 : * loop regardless of whether we succeed or not.
3590 : */
3591 57211167 : if (args->agno == start_agno && target_agbno) {
3592 54682977 : args->agbno = target_agbno;
3593 54682977 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3594 : } else {
3595 2528190 : args->agbno = 0;
3596 2528190 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3597 : }
3598 : break;
3599 : }
3600 57259520 : if (error) {
3601 1494 : xfs_perag_rele(args->pag);
3602 1494 : args->pag = NULL;
3603 1494 : return error;
3604 : }
3605 57258026 : if (args->agbp)
3606 : return 0;
3607 :
3608 : /*
3609 : * We didn't find an AG we can alloation from. If we were given
3610 : * constraining flags by the caller, drop them and retry the allocation
3611 : * without any constraints being set.
3612 : */
3613 49303 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
3614 32870 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
3615 32870 : restart_agno = minimum_agno;
3616 32870 : goto restart;
3617 : }
3618 :
3619 16433 : ASSERT(args->pag == NULL);
3620 16433 : trace_xfs_alloc_vextent_allfailed(args);
3621 16433 : return 0;
3622 : }
3623 :
3624 : /*
3625 : * Iterate from the AGs from the start AG to the end of the filesystem, trying
3626 : * to allocate blocks. It starts with a near allocation attempt in the initial
3627 : * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
3628 : * back to zero if allowed by previous allocations in this transaction,
3629 : * otherwise will wrap back to the start AG and run a second blocking pass to
3630 : * the end of the filesystem.
3631 : */
3632 : int
3633 56854289 : xfs_alloc_vextent_start_ag(
3634 : struct xfs_alloc_arg *args,
3635 : xfs_fsblock_t target)
3636 : {
3637 56854289 : struct xfs_mount *mp = args->mp;
3638 56854289 : xfs_agnumber_t minimum_agno;
3639 56854289 : xfs_agnumber_t start_agno;
3640 56854289 : xfs_agnumber_t rotorstep = xfs_rotorstep;
3641 56854289 : bool bump_rotor = false;
3642 56854289 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3643 56854289 : int error;
3644 :
3645 56854289 : ASSERT(args->pag == NULL);
3646 :
3647 56854289 : args->agno = NULLAGNUMBER;
3648 56854289 : args->agbno = NULLAGBLOCK;
3649 :
3650 56854289 : trace_xfs_alloc_vextent_start_ag(args);
3651 :
3652 56853443 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3653 56852217 : if (error) {
3654 0 : if (error == -ENOSPC)
3655 : return 0;
3656 0 : return error;
3657 : }
3658 :
3659 59320227 : if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3660 : xfs_is_inode32(mp)) {
3661 0 : target = XFS_AGB_TO_FSB(mp,
3662 : ((mp->m_agfrotor / rotorstep) %
3663 : mp->m_sb.sb_agcount), 0);
3664 0 : bump_rotor = 1;
3665 : }
3666 :
3667 56852217 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3668 56852217 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3669 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3670 :
3671 56856628 : if (bump_rotor) {
3672 0 : if (args->agno == start_agno)
3673 0 : mp->m_agfrotor = (mp->m_agfrotor + 1) %
3674 0 : (mp->m_sb.sb_agcount * rotorstep);
3675 : else
3676 0 : mp->m_agfrotor = (args->agno * rotorstep + 1) %
3677 0 : (mp->m_sb.sb_agcount * rotorstep);
3678 : }
3679 :
3680 56856628 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3681 : }
3682 :
3683 : /*
3684 : * Iterate from the agno indicated via @target through to the end of the
3685 : * filesystem attempting blocking allocation. This does not wrap or try a second
3686 : * pass, so will not recurse into AGs lower than indicated by the target.
3687 : */
3688 : int
3689 370156 : xfs_alloc_vextent_first_ag(
3690 : struct xfs_alloc_arg *args,
3691 : xfs_fsblock_t target)
3692 : {
3693 370156 : struct xfs_mount *mp = args->mp;
3694 370156 : xfs_agnumber_t minimum_agno;
3695 370156 : xfs_agnumber_t start_agno;
3696 370156 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3697 370156 : int error;
3698 :
3699 370156 : ASSERT(args->pag == NULL);
3700 :
3701 370156 : args->agno = NULLAGNUMBER;
3702 370156 : args->agbno = NULLAGBLOCK;
3703 :
3704 370156 : trace_xfs_alloc_vextent_first_ag(args);
3705 :
3706 370151 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3707 370139 : if (error) {
3708 0 : if (error == -ENOSPC)
3709 : return 0;
3710 0 : return error;
3711 : }
3712 :
3713 370139 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3714 370139 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3715 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3716 370163 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3717 : }
3718 :
3719 : /*
3720 : * Allocate at the exact block target or fail. Caller is expected to hold a
3721 : * perag reference in args->pag.
3722 : */
3723 : int
3724 403398 : xfs_alloc_vextent_exact_bno(
3725 : struct xfs_alloc_arg *args,
3726 : xfs_fsblock_t target)
3727 : {
3728 403398 : struct xfs_mount *mp = args->mp;
3729 403398 : xfs_agnumber_t minimum_agno;
3730 403398 : int error;
3731 :
3732 403398 : ASSERT(args->pag != NULL);
3733 403398 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3734 :
3735 403398 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3736 403398 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3737 :
3738 403398 : trace_xfs_alloc_vextent_exact_bno(args);
3739 :
3740 403398 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3741 403396 : if (error) {
3742 0 : if (error == -ENOSPC)
3743 : return 0;
3744 0 : return error;
3745 : }
3746 :
3747 403396 : error = xfs_alloc_vextent_prepare_ag(args, 0);
3748 403399 : if (!error && args->agbp)
3749 274562 : error = xfs_alloc_ag_vextent_exact(args);
3750 :
3751 403395 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3752 : }
3753 :
3754 : /*
3755 : * Allocate an extent as close to the target as possible. If there are not
3756 : * viable candidates in the AG, then fail the allocation.
3757 : *
3758 : * Caller may or may not have a per-ag reference in args->pag.
3759 : */
3760 : int
3761 1078601 : xfs_alloc_vextent_near_bno(
3762 : struct xfs_alloc_arg *args,
3763 : xfs_fsblock_t target)
3764 : {
3765 1078601 : struct xfs_mount *mp = args->mp;
3766 1078601 : xfs_agnumber_t minimum_agno;
3767 1078601 : bool needs_perag = args->pag == NULL;
3768 1078601 : uint32_t alloc_flags = 0;
3769 1078601 : int error;
3770 :
3771 1078601 : if (!needs_perag)
3772 992656 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3773 :
3774 1078601 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3775 1078601 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3776 :
3777 1078601 : trace_xfs_alloc_vextent_near_bno(args);
3778 :
3779 1078576 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3780 1078572 : if (error) {
3781 0 : if (error == -ENOSPC)
3782 : return 0;
3783 0 : return error;
3784 : }
3785 :
3786 1078572 : if (needs_perag)
3787 85918 : args->pag = xfs_perag_grab(mp, args->agno);
3788 :
3789 1078594 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3790 1078613 : if (!error && args->agbp)
3791 651290 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3792 :
3793 1078594 : return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
3794 : }
3795 :
3796 : /* Ensure that the freelist is at full capacity. */
3797 : int
3798 265873970 : xfs_free_extent_fix_freelist(
3799 : struct xfs_trans *tp,
3800 : struct xfs_perag *pag,
3801 : struct xfs_buf **agbp)
3802 : {
3803 265873970 : struct xfs_alloc_arg args;
3804 265873970 : int error;
3805 :
3806 265873970 : memset(&args, 0, sizeof(struct xfs_alloc_arg));
3807 265873970 : args.tp = tp;
3808 265873970 : args.mp = tp->t_mountp;
3809 265873970 : args.agno = pag->pag_agno;
3810 265873970 : args.pag = pag;
3811 :
3812 : /*
3813 : * validate that the block number is legal - the enables us to detect
3814 : * and handle a silent filesystem corruption rather than crashing.
3815 : */
3816 265873970 : if (args.agno >= args.mp->m_sb.sb_agcount)
3817 : return -EFSCORRUPTED;
3818 :
3819 265873970 : error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3820 265877954 : if (error)
3821 : return error;
3822 :
3823 265876909 : *agbp = args.agbp;
3824 265876909 : return 0;
3825 : }
3826 :
3827 : /*
3828 : * Free an extent.
3829 : * Just break up the extent address and hand off to xfs_free_ag_extent
3830 : * after fixing up the freelist.
3831 : */
3832 : int
3833 51178224 : __xfs_free_extent(
3834 : struct xfs_trans *tp,
3835 : struct xfs_perag *pag,
3836 : xfs_agblock_t agbno,
3837 : xfs_extlen_t len,
3838 : const struct xfs_owner_info *oinfo,
3839 : enum xfs_ag_resv_type type,
3840 : bool skip_discard)
3841 : {
3842 51178224 : struct xfs_mount *mp = tp->t_mountp;
3843 51178224 : struct xfs_buf *agbp;
3844 51178224 : struct xfs_agf *agf;
3845 51178224 : int error;
3846 51178224 : unsigned int busy_flags = 0;
3847 :
3848 51178224 : ASSERT(len != 0);
3849 51178224 : ASSERT(type != XFS_AG_RESV_AGFL);
3850 :
3851 51178224 : if (XFS_TEST_ERROR(false, mp,
3852 : XFS_ERRTAG_FREE_EXTENT))
3853 : return -EIO;
3854 :
3855 51177940 : error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
3856 51178897 : if (error) {
3857 14 : if (xfs_metadata_is_sick(error))
3858 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3859 14 : return error;
3860 : }
3861 :
3862 51178883 : agf = agbp->b_addr;
3863 :
3864 51178883 : if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3865 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3866 0 : error = -EFSCORRUPTED;
3867 0 : goto err_release;
3868 : }
3869 :
3870 : /* validate the extent size is legal now we have the agf locked */
3871 102357766 : if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3872 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3873 0 : error = -EFSCORRUPTED;
3874 0 : goto err_release;
3875 : }
3876 :
3877 51178883 : error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
3878 : type);
3879 51177360 : if (error)
3880 198 : goto err_release;
3881 :
3882 51177162 : if (skip_discard)
3883 1571145 : busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3884 51177162 : xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
3885 51177162 : return 0;
3886 :
3887 198 : err_release:
3888 198 : xfs_trans_brelse(tp, agbp);
3889 198 : return error;
3890 : }
3891 :
3892 : struct xfs_alloc_query_range_info {
3893 : xfs_alloc_query_range_fn fn;
3894 : void *priv;
3895 : };
3896 :
3897 : /* Format btree record and pass to our callback. */
3898 : STATIC int
3899 947148472 : xfs_alloc_query_range_helper(
3900 : struct xfs_btree_cur *cur,
3901 : const union xfs_btree_rec *rec,
3902 : void *priv)
3903 : {
3904 947148472 : struct xfs_alloc_query_range_info *query = priv;
3905 947148472 : struct xfs_alloc_rec_incore irec;
3906 947148472 : xfs_failaddr_t fa;
3907 :
3908 947148472 : xfs_alloc_btrec_to_irec(rec, &irec);
3909 947148672 : fa = xfs_alloc_check_irec(cur, &irec);
3910 947148870 : if (fa)
3911 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
3912 :
3913 947148870 : return query->fn(cur, &irec, query->priv);
3914 : }
3915 :
3916 : /* Find all free space within a given range of blocks. */
3917 : int
3918 21267 : xfs_alloc_query_range(
3919 : struct xfs_btree_cur *cur,
3920 : const struct xfs_alloc_rec_incore *low_rec,
3921 : const struct xfs_alloc_rec_incore *high_rec,
3922 : xfs_alloc_query_range_fn fn,
3923 : void *priv)
3924 : {
3925 21267 : union xfs_btree_irec low_brec = { .a = *low_rec };
3926 21267 : union xfs_btree_irec high_brec = { .a = *high_rec };
3927 21267 : struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
3928 :
3929 21267 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3930 21267 : return xfs_btree_query_range(cur, &low_brec, &high_brec,
3931 : xfs_alloc_query_range_helper, &query);
3932 : }
3933 :
3934 : /* Find all free space records. */
3935 : int
3936 420882 : xfs_alloc_query_all(
3937 : struct xfs_btree_cur *cur,
3938 : xfs_alloc_query_range_fn fn,
3939 : void *priv)
3940 : {
3941 420882 : struct xfs_alloc_query_range_info query;
3942 :
3943 420882 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3944 420882 : query.priv = priv;
3945 420882 : query.fn = fn;
3946 420882 : return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3947 : }
3948 :
3949 : /*
3950 : * Scan part of the keyspace of the free space and tell us if the area has no
3951 : * records, is fully mapped by records, or is partially filled.
3952 : */
3953 : int
3954 1265272807 : xfs_alloc_has_records(
3955 : struct xfs_btree_cur *cur,
3956 : xfs_agblock_t bno,
3957 : xfs_extlen_t len,
3958 : enum xbtree_recpacking *outcome)
3959 : {
3960 1265272807 : union xfs_btree_irec low;
3961 1265272807 : union xfs_btree_irec high;
3962 :
3963 1265272807 : memset(&low, 0, sizeof(low));
3964 1265272807 : low.a.ar_startblock = bno;
3965 1265272807 : memset(&high, 0xFF, sizeof(high));
3966 1265272807 : high.a.ar_startblock = bno + len - 1;
3967 :
3968 1265272807 : return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
3969 : }
3970 :
3971 : /*
3972 : * Walk all the blocks in the AGFL. The @walk_fn can return any negative
3973 : * error code or XFS_ITER_*.
3974 : */
3975 : int
3976 41770455 : xfs_agfl_walk(
3977 : struct xfs_mount *mp,
3978 : struct xfs_agf *agf,
3979 : struct xfs_buf *agflbp,
3980 : xfs_agfl_walk_fn walk_fn,
3981 : void *priv)
3982 : {
3983 41770455 : __be32 *agfl_bno;
3984 41770455 : unsigned int i;
3985 41770455 : int error;
3986 :
3987 41770455 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3988 41770455 : i = be32_to_cpu(agf->agf_flfirst);
3989 :
3990 : /* Nothing to walk in an empty AGFL. */
3991 41770455 : if (agf->agf_flcount == cpu_to_be32(0))
3992 : return 0;
3993 :
3994 : /* Otherwise, walk from first to last, wrapping as needed. */
3995 404137868 : for (;;) {
3996 808275736 : error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3997 404137870 : if (error)
3998 1589883 : return error;
3999 805095974 : if (i == be32_to_cpu(agf->agf_fllast))
4000 : break;
4001 724754106 : if (++i == xfs_agfl_size(mp))
4002 4 : i = 0;
4003 : }
4004 :
4005 : return 0;
4006 : }
4007 :
4008 : int __init
4009 12 : xfs_extfree_intent_init_cache(void)
4010 : {
4011 12 : xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
4012 : sizeof(struct xfs_extent_free_item),
4013 : 0, 0, NULL);
4014 :
4015 12 : return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
4016 : }
4017 :
4018 : void
4019 12 : xfs_extfree_intent_destroy_cache(void)
4020 : {
4021 12 : kmem_cache_destroy(xfs_extfree_item_cache);
4022 12 : xfs_extfree_item_cache = NULL;
4023 12 : }
|