Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_format.h"
9 : #include "xfs_log_format.h"
10 : #include "xfs_shared.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_defer.h"
15 : #include "xfs_btree.h"
16 : #include "xfs_rmap.h"
17 : #include "xfs_alloc_btree.h"
18 : #include "xfs_alloc.h"
19 : #include "xfs_extent_busy.h"
20 : #include "xfs_errortag.h"
21 : #include "xfs_error.h"
22 : #include "xfs_trace.h"
23 : #include "xfs_trans.h"
24 : #include "xfs_buf_item.h"
25 : #include "xfs_log.h"
26 : #include "xfs_ag.h"
27 : #include "xfs_ag_resv.h"
28 : #include "xfs_bmap.h"
29 : #include "xfs_health.h"
30 :
31 : struct kmem_cache *xfs_extfree_item_cache;
32 :
33 : struct workqueue_struct *xfs_alloc_wq;
34 :
35 : #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b)))
36 :
37 : #define XFSA_FIXUP_BNO_OK 1
38 : #define XFSA_FIXUP_CNT_OK 2
39 :
40 : /*
41 : * Size of the AGFL. For CRC-enabled filesystes we steal a couple of slots in
42 : * the beginning of the block for a proper header with the location information
43 : * and CRC.
44 : */
45 : unsigned int
46 29618198 : xfs_agfl_size(
47 : struct xfs_mount *mp)
48 : {
49 1429695340 : unsigned int size = mp->m_sb.sb_sectsize;
50 :
51 29618198 : if (xfs_has_crc(mp))
52 1416570064 : size -= sizeof(struct xfs_agfl);
53 :
54 1429695340 : return size / sizeof(xfs_agblock_t);
55 : }
56 :
57 : unsigned int
58 276305 : xfs_refc_block(
59 : struct xfs_mount *mp)
60 : {
61 276305 : if (xfs_has_rmapbt(mp))
62 262562 : return XFS_RMAP_BLOCK(mp) + 1;
63 13743 : if (xfs_has_finobt(mp))
64 146 : return XFS_FIBT_BLOCK(mp) + 1;
65 13597 : return XFS_IBT_BLOCK(mp) + 1;
66 : }
67 :
68 : xfs_extlen_t
69 75030 : xfs_prealloc_blocks(
70 : struct xfs_mount *mp)
71 : {
72 75030 : if (xfs_has_reflink(mp))
73 56764 : return xfs_refc_block(mp) + 1;
74 18266 : if (xfs_has_rmapbt(mp))
75 26 : return XFS_RMAP_BLOCK(mp) + 1;
76 18240 : if (xfs_has_finobt(mp))
77 18002 : return XFS_FIBT_BLOCK(mp) + 1;
78 238 : return XFS_IBT_BLOCK(mp) + 1;
79 : }
80 :
81 : /*
82 : * The number of blocks per AG that we withhold from xfs_mod_fdblocks to
83 : * guarantee that we can refill the AGFL prior to allocating space in a nearly
84 : * full AG. Although the space described by the free space btrees, the
85 : * blocks used by the freesp btrees themselves, and the blocks owned by the
86 : * AGFL are counted in the ondisk fdblocks, it's a mistake to let the ondisk
87 : * free space in the AG drop so low that the free space btrees cannot refill an
88 : * empty AGFL up to the minimum level. Rather than grind through empty AGs
89 : * until the fs goes down, we subtract this many AG blocks from the incore
90 : * fdblocks to ensure user allocation does not overcommit the space the
91 : * filesystem needs for the AGFLs. The rmap btree uses a per-AG reservation to
92 : * withhold space from xfs_mod_fdblocks, so we do not account for that here.
93 : */
94 : #define XFS_ALLOCBT_AGFL_RESERVE 4
95 :
96 : /*
97 : * Compute the number of blocks that we set aside to guarantee the ability to
98 : * refill the AGFL and handle a full bmap btree split.
99 : *
100 : * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of
101 : * AGF buffer (PV 947395), we place constraints on the relationship among
102 : * actual allocations for data blocks, freelist blocks, and potential file data
103 : * bmap btree blocks. However, these restrictions may result in no actual space
104 : * allocated for a delayed extent, for example, a data block in a certain AG is
105 : * allocated but there is no additional block for the additional bmap btree
106 : * block due to a split of the bmap btree of the file. The result of this may
107 : * lead to an infinite loop when the file gets flushed to disk and all delayed
108 : * extents need to be actually allocated. To get around this, we explicitly set
109 : * aside a few blocks which will not be reserved in delayed allocation.
110 : *
111 : * For each AG, we need to reserve enough blocks to replenish a totally empty
112 : * AGFL and 4 more to handle a potential split of the file's bmap btree.
113 : */
114 : unsigned int
115 75456 : xfs_alloc_set_aside(
116 : struct xfs_mount *mp)
117 : {
118 75456 : return mp->m_sb.sb_agcount * (XFS_ALLOCBT_AGFL_RESERVE + 4);
119 : }
120 :
121 : /*
122 : * When deciding how much space to allocate out of an AG, we limit the
123 : * allocation maximum size to the size the AG. However, we cannot use all the
124 : * blocks in the AG - some are permanently used by metadata. These
125 : * blocks are generally:
126 : * - the AG superblock, AGF, AGI and AGFL
127 : * - the AGF (bno and cnt) and AGI btree root blocks, and optionally
128 : * the AGI free inode and rmap btree root blocks.
129 : * - blocks on the AGFL according to xfs_alloc_set_aside() limits
130 : * - the rmapbt root block
131 : *
132 : * The AG headers are sector sized, so the amount of space they take up is
133 : * dependent on filesystem geometry. The others are all single blocks.
134 : */
135 : unsigned int
136 60890 : xfs_alloc_ag_max_usable(
137 : struct xfs_mount *mp)
138 : {
139 60890 : unsigned int blocks;
140 :
141 60890 : blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */
142 60890 : blocks += XFS_ALLOCBT_AGFL_RESERVE;
143 60890 : blocks += 3; /* AGF, AGI btree root blocks */
144 60890 : if (xfs_has_finobt(mp))
145 60651 : blocks++; /* finobt root block */
146 60890 : if (xfs_has_rmapbt(mp))
147 44233 : blocks++; /* rmap root block */
148 60890 : if (xfs_has_reflink(mp))
149 44207 : blocks++; /* refcount root block */
150 :
151 60890 : return mp->m_sb.sb_agblocks - blocks;
152 : }
153 :
154 : /*
155 : * Lookup the record equal to [bno, len] in the btree given by cur.
156 : */
157 : STATIC int /* error */
158 394037577 : xfs_alloc_lookup_eq(
159 : struct xfs_btree_cur *cur, /* btree cursor */
160 : xfs_agblock_t bno, /* starting block of extent */
161 : xfs_extlen_t len, /* length of extent */
162 : int *stat) /* success/failure */
163 : {
164 394037577 : int error;
165 :
166 394037577 : cur->bc_rec.a.ar_startblock = bno;
167 394037577 : cur->bc_rec.a.ar_blockcount = len;
168 394037577 : error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
169 394066281 : cur->bc_ag.abt.active = (*stat == 1);
170 394066281 : return error;
171 : }
172 :
173 : /*
174 : * Lookup the first record greater than or equal to [bno, len]
175 : * in the btree given by cur.
176 : */
177 : int /* error */
178 18525 : xfs_alloc_lookup_ge(
179 : struct xfs_btree_cur *cur, /* btree cursor */
180 : xfs_agblock_t bno, /* starting block of extent */
181 : xfs_extlen_t len, /* length of extent */
182 : int *stat) /* success/failure */
183 : {
184 376224711 : int error;
185 :
186 376224711 : cur->bc_rec.a.ar_startblock = bno;
187 376224711 : cur->bc_rec.a.ar_blockcount = len;
188 18525 : error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
189 376251066 : cur->bc_ag.abt.active = (*stat == 1);
190 376251066 : return error;
191 : }
192 :
193 : /*
194 : * Lookup the first record less than or equal to [bno, len]
195 : * in the btree given by cur.
196 : */
197 : int /* error */
198 445719885 : xfs_alloc_lookup_le(
199 : struct xfs_btree_cur *cur, /* btree cursor */
200 : xfs_agblock_t bno, /* starting block of extent */
201 : xfs_extlen_t len, /* length of extent */
202 : int *stat) /* success/failure */
203 : {
204 593122276 : int error;
205 593122276 : cur->bc_rec.a.ar_startblock = bno;
206 593122276 : cur->bc_rec.a.ar_blockcount = len;
207 445719885 : error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
208 593137220 : cur->bc_ag.abt.active = (*stat == 1);
209 593137220 : return error;
210 : }
211 :
212 : static inline bool
213 11187183782 : xfs_alloc_cur_active(
214 : struct xfs_btree_cur *cur)
215 : {
216 11187183782 : return cur && cur->bc_ag.abt.active;
217 : }
218 :
219 : /*
220 : * Update the record referred to by cur to the value given
221 : * by [bno, len].
222 : * This either works (return 0) or gets an EFSCORRUPTED error.
223 : */
224 : STATIC int /* error */
225 105251599 : xfs_alloc_update(
226 : struct xfs_btree_cur *cur, /* btree cursor */
227 : xfs_agblock_t bno, /* starting block of extent */
228 : xfs_extlen_t len) /* length of extent */
229 : {
230 105251599 : union xfs_btree_rec rec;
231 :
232 105251599 : rec.alloc.ar_startblock = cpu_to_be32(bno);
233 105251599 : rec.alloc.ar_blockcount = cpu_to_be32(len);
234 105251599 : return xfs_btree_update(cur, &rec);
235 : }
236 :
237 : /* Convert the ondisk btree record to its incore representation. */
238 : void
239 444884001 : xfs_alloc_btrec_to_irec(
240 : const union xfs_btree_rec *rec,
241 : struct xfs_alloc_rec_incore *irec)
242 : {
243 9870935423 : irec->ar_startblock = be32_to_cpu(rec->alloc.ar_startblock);
244 9870935423 : irec->ar_blockcount = be32_to_cpu(rec->alloc.ar_blockcount);
245 444884001 : }
246 :
247 : inline xfs_failaddr_t
248 9915404526 : xfs_alloc_check_perag_irec(
249 : struct xfs_perag *pag,
250 : const struct xfs_alloc_rec_incore *irec)
251 : {
252 9915404526 : if (irec->ar_blockcount == 0)
253 0 : return __this_address;
254 :
255 : /* check for valid extent range, including overflow */
256 9915404526 : if (!xfs_verify_agbext(pag, irec->ar_startblock, irec->ar_blockcount))
257 0 : return __this_address;
258 :
259 : return NULL;
260 : }
261 :
262 : /* Simple checks for free space records. */
263 : xfs_failaddr_t
264 444874784 : xfs_alloc_check_irec(
265 : struct xfs_btree_cur *cur,
266 : const struct xfs_alloc_rec_incore *irec)
267 : {
268 444874784 : return xfs_alloc_check_perag_irec(cur->bc_ag.pag, irec);
269 : }
270 :
271 : static inline int
272 0 : xfs_alloc_complain_bad_rec(
273 : struct xfs_btree_cur *cur,
274 : xfs_failaddr_t fa,
275 : const struct xfs_alloc_rec_incore *irec)
276 : {
277 0 : struct xfs_mount *mp = cur->bc_mp;
278 :
279 0 : xfs_warn(mp,
280 : "%s Freespace BTree record corruption in AG %d detected at %pS!",
281 : cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size",
282 : cur->bc_ag.pag->pag_agno, fa);
283 0 : xfs_warn(mp,
284 : "start block 0x%x block count 0x%x", irec->ar_startblock,
285 : irec->ar_blockcount);
286 0 : xfs_btree_mark_sick(cur);
287 0 : return -EFSCORRUPTED;
288 : }
289 :
290 : /*
291 : * Get the data from the pointed-to record.
292 : */
293 : int /* error */
294 7172878875 : xfs_alloc_get_rec(
295 : struct xfs_btree_cur *cur, /* btree cursor */
296 : xfs_agblock_t *bno, /* output: starting block of extent */
297 : xfs_extlen_t *len, /* output: length of extent */
298 : int *stat) /* output: success/failure */
299 : {
300 7172878875 : struct xfs_alloc_rec_incore irec;
301 7172878875 : union xfs_btree_rec *rec;
302 7172878875 : xfs_failaddr_t fa;
303 7172878875 : int error;
304 :
305 7172878875 : error = xfs_btree_get_rec(cur, &rec, stat);
306 7160844992 : if (error || !(*stat))
307 : return error;
308 :
309 7161651219 : xfs_alloc_btrec_to_irec(rec, &irec);
310 7161651219 : fa = xfs_alloc_check_irec(cur, &irec);
311 7158011172 : if (fa)
312 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
313 :
314 7158011172 : *bno = irec.ar_startblock;
315 7158011172 : *len = irec.ar_blockcount;
316 7158011172 : return 0;
317 : }
318 :
319 : /*
320 : * Compute aligned version of the found extent.
321 : * Takes alignment and min length into account.
322 : */
323 : STATIC bool
324 5667603957 : xfs_alloc_compute_aligned(
325 : xfs_alloc_arg_t *args, /* allocation argument structure */
326 : xfs_agblock_t foundbno, /* starting block in found extent */
327 : xfs_extlen_t foundlen, /* length in found extent */
328 : xfs_agblock_t *resbno, /* result block number */
329 : xfs_extlen_t *reslen, /* result length */
330 : unsigned *busy_gen)
331 : {
332 5667603957 : xfs_agblock_t bno = foundbno;
333 5667603957 : xfs_extlen_t len = foundlen;
334 5667603957 : xfs_extlen_t diff;
335 5667603957 : bool busy;
336 :
337 : /* Trim busy sections out of found extent */
338 5667603957 : busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
339 :
340 : /*
341 : * If we have a largish extent that happens to start before min_agbno,
342 : * see if we can shift it into range...
343 : */
344 5677102395 : if (bno < args->min_agbno && bno + len > args->min_agbno) {
345 2581 : diff = args->min_agbno - bno;
346 2581 : if (len > diff) {
347 2581 : bno += diff;
348 2581 : len -= diff;
349 : }
350 : }
351 :
352 5677102395 : if (args->alignment > 1 && len >= args->minlen) {
353 16695473 : xfs_agblock_t aligned_bno = roundup(bno, args->alignment);
354 :
355 16695473 : diff = aligned_bno - bno;
356 :
357 16695473 : *resbno = aligned_bno;
358 16695473 : *reslen = diff >= len ? 0 : len - diff;
359 : } else {
360 5660406922 : *resbno = bno;
361 5660406922 : *reslen = len;
362 : }
363 :
364 5677102395 : return busy;
365 : }
366 :
367 : /*
368 : * Compute best start block and diff for "near" allocations.
369 : * freelen >= wantlen already checked by caller.
370 : */
371 : STATIC xfs_extlen_t /* difference value (absolute) */
372 2273983459 : xfs_alloc_compute_diff(
373 : xfs_agblock_t wantbno, /* target starting block */
374 : xfs_extlen_t wantlen, /* target length */
375 : xfs_extlen_t alignment, /* target alignment */
376 : int datatype, /* are we allocating data? */
377 : xfs_agblock_t freebno, /* freespace's starting block */
378 : xfs_extlen_t freelen, /* freespace's length */
379 : xfs_agblock_t *newbnop) /* result: best start block from free */
380 : {
381 2273983459 : xfs_agblock_t freeend; /* end of freespace extent */
382 2273983459 : xfs_agblock_t newbno1; /* return block number */
383 2273983459 : xfs_agblock_t newbno2; /* other new block number */
384 2273983459 : xfs_extlen_t newlen1=0; /* length with newbno1 */
385 2273983459 : xfs_extlen_t newlen2=0; /* length with newbno2 */
386 2273983459 : xfs_agblock_t wantend; /* end of target extent */
387 2273983459 : bool userdata = datatype & XFS_ALLOC_USERDATA;
388 :
389 2273983459 : ASSERT(freelen >= wantlen);
390 2273983459 : freeend = freebno + freelen;
391 2273983459 : wantend = wantbno + wantlen;
392 : /*
393 : * We want to allocate from the start of a free extent if it is past
394 : * the desired block or if we are allocating user data and the free
395 : * extent is before desired block. The second case is there to allow
396 : * for contiguous allocation from the remaining free space if the file
397 : * grows in the short term.
398 : */
399 2273983459 : if (freebno >= wantbno || (userdata && freeend < wantend)) {
400 1680514212 : if ((newbno1 = roundup(freebno, alignment)) >= freeend)
401 0 : newbno1 = NULLAGBLOCK;
402 593469247 : } else if (freeend >= wantend && alignment > 1) {
403 29 : newbno1 = roundup(wantbno, alignment);
404 29 : newbno2 = newbno1 - alignment;
405 29 : if (newbno1 >= freeend)
406 : newbno1 = NULLAGBLOCK;
407 : else
408 29 : newlen1 = XFS_EXTLEN_MIN(wantlen, freeend - newbno1);
409 29 : if (newbno2 < freebno)
410 : newbno2 = NULLAGBLOCK;
411 : else
412 29 : newlen2 = XFS_EXTLEN_MIN(wantlen, freeend - newbno2);
413 29 : if (newbno1 != NULLAGBLOCK && newbno2 != NULLAGBLOCK) {
414 29 : if (newlen1 < newlen2 ||
415 28 : (newlen1 == newlen2 &&
416 28 : XFS_ABSDIFF(newbno1, wantbno) >
417 28 : XFS_ABSDIFF(newbno2, wantbno)))
418 16 : newbno1 = newbno2;
419 0 : } else if (newbno2 != NULLAGBLOCK)
420 0 : newbno1 = newbno2;
421 593469218 : } else if (freeend >= wantend) {
422 : newbno1 = wantbno;
423 591435711 : } else if (alignment > 1) {
424 25940 : newbno1 = roundup(freeend - wantlen, alignment);
425 25940 : if (newbno1 > freeend - wantlen &&
426 16012 : newbno1 - alignment >= freebno)
427 : newbno1 -= alignment;
428 9928 : else if (newbno1 >= freeend)
429 0 : newbno1 = NULLAGBLOCK;
430 : } else
431 591409771 : newbno1 = freeend - wantlen;
432 2273983459 : *newbnop = newbno1;
433 2273983459 : return newbno1 == NULLAGBLOCK ? 0 : XFS_ABSDIFF(newbno1, wantbno);
434 : }
435 :
436 : /*
437 : * Fix up the length, based on mod and prod.
438 : * len should be k * prod + mod for some k.
439 : * If len is too small it is returned unchanged.
440 : * If len hits maxlen it is left alone.
441 : */
442 : STATIC void
443 2281730070 : xfs_alloc_fix_len(
444 : xfs_alloc_arg_t *args) /* allocation argument structure */
445 : {
446 2281730070 : xfs_extlen_t k;
447 2281730070 : xfs_extlen_t rlen;
448 :
449 2281730070 : ASSERT(args->mod < args->prod);
450 2281730070 : rlen = args->len;
451 2281730070 : ASSERT(rlen >= args->minlen);
452 2281730070 : ASSERT(rlen <= args->maxlen);
453 2281730070 : if (args->prod <= 1 || rlen < args->mod || rlen == args->maxlen ||
454 220748 : (args->mod == 0 && rlen < args->prod))
455 : return;
456 23117 : k = rlen % args->prod;
457 23117 : if (k == args->mod)
458 : return;
459 19669 : if (k > args->mod)
460 19309 : rlen = rlen - (k - args->mod);
461 : else
462 360 : rlen = rlen - args->prod + (args->mod - k);
463 : /* casts to (int) catch length underflows */
464 19669 : if ((int)rlen < (int)args->minlen)
465 : return;
466 1390 : ASSERT(rlen >= args->minlen && rlen <= args->maxlen);
467 1390 : ASSERT(rlen % args->prod == args->mod);
468 1390 : ASSERT(args->pag->pagf_freeblks + args->pag->pagf_flcount >=
469 : rlen + args->minleft);
470 1390 : args->len = rlen;
471 : }
472 :
473 : /*
474 : * Update the two btrees, logically removing from freespace the extent
475 : * starting at rbno, rlen blocks. The extent is contained within the
476 : * actual (current) free extent fbno for flen blocks.
477 : * Flags are passed in indicating whether the cursors are set to the
478 : * relevant records.
479 : */
480 : STATIC int /* error code */
481 85969767 : xfs_alloc_fixup_trees(
482 : struct xfs_btree_cur *cnt_cur, /* cursor for by-size btree */
483 : struct xfs_btree_cur *bno_cur, /* cursor for by-block btree */
484 : xfs_agblock_t fbno, /* starting block of free extent */
485 : xfs_extlen_t flen, /* length of free extent */
486 : xfs_agblock_t rbno, /* starting block of returned extent */
487 : xfs_extlen_t rlen, /* length of returned extent */
488 : int flags) /* flags, XFSA_FIXUP_... */
489 : {
490 85969767 : int error; /* error code */
491 85969767 : int i; /* operation results */
492 85969767 : xfs_agblock_t nfbno1; /* first new free startblock */
493 85969767 : xfs_agblock_t nfbno2; /* second new free startblock */
494 85969767 : xfs_extlen_t nflen1=0; /* first new free length */
495 85969767 : xfs_extlen_t nflen2=0; /* second new free length */
496 85969767 : struct xfs_mount *mp;
497 :
498 85969767 : mp = cnt_cur->bc_mp;
499 :
500 : /*
501 : * Look up the record in the by-size tree if necessary.
502 : */
503 85969767 : if (flags & XFSA_FIXUP_CNT_OK) {
504 : #ifdef DEBUG
505 6599460 : if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
506 : return error;
507 6599375 : if (XFS_IS_CORRUPT(mp,
508 : i != 1 ||
509 : nfbno1 != fbno ||
510 : nflen1 != flen)) {
511 0 : xfs_btree_mark_sick(cnt_cur);
512 0 : return -EFSCORRUPTED;
513 : }
514 : #endif
515 : } else {
516 79370307 : if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
517 : return error;
518 79375555 : if (XFS_IS_CORRUPT(mp, i != 1)) {
519 0 : xfs_btree_mark_sick(cnt_cur);
520 0 : return -EFSCORRUPTED;
521 : }
522 : }
523 : /*
524 : * Look up the record in the by-block tree if necessary.
525 : */
526 85974930 : if (flags & XFSA_FIXUP_BNO_OK) {
527 : #ifdef DEBUG
528 428017 : if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
529 : return error;
530 428044 : if (XFS_IS_CORRUPT(mp,
531 : i != 1 ||
532 : nfbno1 != fbno ||
533 : nflen1 != flen)) {
534 0 : xfs_btree_mark_sick(bno_cur);
535 0 : return -EFSCORRUPTED;
536 : }
537 : #endif
538 : } else {
539 85546913 : if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
540 : return error;
541 85545656 : if (XFS_IS_CORRUPT(mp, i != 1)) {
542 0 : xfs_btree_mark_sick(bno_cur);
543 0 : return -EFSCORRUPTED;
544 : }
545 : }
546 :
547 : #ifdef DEBUG
548 85973700 : if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) {
549 53893147 : struct xfs_btree_block *bnoblock;
550 53893147 : struct xfs_btree_block *cntblock;
551 :
552 53893147 : bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_levels[0].bp);
553 53893147 : cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_levels[0].bp);
554 :
555 53893147 : if (XFS_IS_CORRUPT(mp,
556 : bnoblock->bb_numrecs !=
557 : cntblock->bb_numrecs)) {
558 0 : xfs_btree_mark_sick(bno_cur);
559 0 : return -EFSCORRUPTED;
560 : }
561 : }
562 : #endif
563 :
564 : /*
565 : * Deal with all four cases: the allocated record is contained
566 : * within the freespace record, so we can have new freespace
567 : * at either (or both) end, or no freespace remaining.
568 : */
569 85973700 : if (rbno == fbno && rlen == flen)
570 24060982 : nfbno1 = nfbno2 = NULLAGBLOCK;
571 61912718 : else if (rbno == fbno) {
572 52326428 : nfbno1 = rbno + rlen;
573 52326428 : nflen1 = flen - rlen;
574 52326428 : nfbno2 = NULLAGBLOCK;
575 9586290 : } else if (rbno + rlen == fbno + flen) {
576 4283179 : nfbno1 = fbno;
577 4283179 : nflen1 = flen - rlen;
578 4283179 : nfbno2 = NULLAGBLOCK;
579 : } else {
580 5303111 : nfbno1 = fbno;
581 5303111 : nflen1 = rbno - fbno;
582 5303111 : nfbno2 = rbno + rlen;
583 5303111 : nflen2 = (fbno + flen) - nfbno2;
584 : }
585 : /*
586 : * Delete the entry from the by-size btree.
587 : */
588 85973700 : if ((error = xfs_btree_delete(cnt_cur, &i)))
589 : return error;
590 85963717 : if (XFS_IS_CORRUPT(mp, i != 1)) {
591 0 : xfs_btree_mark_sick(cnt_cur);
592 0 : return -EFSCORRUPTED;
593 : }
594 : /*
595 : * Add new by-size btree entry(s).
596 : */
597 85963717 : if (nfbno1 != NULLAGBLOCK) {
598 61906289 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
599 : return error;
600 61912613 : if (XFS_IS_CORRUPT(mp, i != 0)) {
601 0 : xfs_btree_mark_sick(cnt_cur);
602 0 : return -EFSCORRUPTED;
603 : }
604 61912613 : if ((error = xfs_btree_insert(cnt_cur, &i)))
605 : return error;
606 61897814 : if (XFS_IS_CORRUPT(mp, i != 1)) {
607 0 : xfs_btree_mark_sick(cnt_cur);
608 0 : return -EFSCORRUPTED;
609 : }
610 : }
611 85955242 : if (nfbno2 != NULLAGBLOCK) {
612 5302961 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
613 : return error;
614 5303109 : if (XFS_IS_CORRUPT(mp, i != 0)) {
615 0 : xfs_btree_mark_sick(cnt_cur);
616 0 : return -EFSCORRUPTED;
617 : }
618 5303109 : if ((error = xfs_btree_insert(cnt_cur, &i)))
619 : return error;
620 5303134 : if (XFS_IS_CORRUPT(mp, i != 1)) {
621 0 : xfs_btree_mark_sick(cnt_cur);
622 0 : return -EFSCORRUPTED;
623 : }
624 : }
625 : /*
626 : * Fix up the by-block btree entry(s).
627 : */
628 85955415 : if (nfbno1 == NULLAGBLOCK) {
629 : /*
630 : * No remaining freespace, just delete the by-block tree entry.
631 : */
632 24056591 : if ((error = xfs_btree_delete(bno_cur, &i)))
633 : return error;
634 24060928 : if (XFS_IS_CORRUPT(mp, i != 1)) {
635 0 : xfs_btree_mark_sick(bno_cur);
636 0 : return -EFSCORRUPTED;
637 : }
638 : } else {
639 : /*
640 : * Update the by-block entry to start later|be shorter.
641 : */
642 61898824 : if ((error = xfs_alloc_update(bno_cur, nfbno1, nflen1)))
643 : return error;
644 : }
645 85968413 : if (nfbno2 != NULLAGBLOCK) {
646 : /*
647 : * 2 resulting free entries, need to add one.
648 : */
649 5303065 : if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
650 : return error;
651 5303103 : if (XFS_IS_CORRUPT(mp, i != 0)) {
652 0 : xfs_btree_mark_sick(bno_cur);
653 0 : return -EFSCORRUPTED;
654 : }
655 5303103 : if ((error = xfs_btree_insert(bno_cur, &i)))
656 : return error;
657 5303120 : if (XFS_IS_CORRUPT(mp, i != 1)) {
658 0 : xfs_btree_mark_sick(bno_cur);
659 0 : return -EFSCORRUPTED;
660 : }
661 : }
662 : return 0;
663 : }
664 :
665 : /*
666 : * We do not verify the AGFL contents against AGF-based index counters here,
667 : * even though we may have access to the perag that contains shadow copies. We
668 : * don't know if the AGF based counters have been checked, and if they have they
669 : * still may be inconsistent because they haven't yet been reset on the first
670 : * allocation after the AGF has been read in.
671 : *
672 : * This means we can only check that all agfl entries contain valid or null
673 : * values because we can't reliably determine the active range to exclude
674 : * NULLAGBNO as a valid value.
675 : *
676 : * However, we can't even do that for v4 format filesystems because there are
677 : * old versions of mkfs out there that does not initialise the AGFL to known,
678 : * verifiable values. HEnce we can't tell the difference between a AGFL block
679 : * allocated by mkfs and a corrupted AGFL block here on v4 filesystems.
680 : *
681 : * As a result, we can only fully validate AGFL block numbers when we pull them
682 : * from the freelist in xfs_alloc_get_freelist().
683 : */
684 : static xfs_failaddr_t
685 898541 : xfs_agfl_verify(
686 : struct xfs_buf *bp)
687 : {
688 898541 : struct xfs_mount *mp = bp->b_mount;
689 898541 : struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp);
690 898541 : __be32 *agfl_bno = xfs_buf_to_agfl_bno(bp);
691 898541 : int i;
692 :
693 898541 : if (!xfs_has_crc(mp))
694 : return NULL;
695 :
696 898519 : if (!xfs_verify_magic(bp, agfl->agfl_magicnum))
697 0 : return __this_address;
698 898487 : if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid))
699 0 : return __this_address;
700 : /*
701 : * during growfs operations, the perag is not fully initialised,
702 : * so we can't use it for any useful checking. growfs ensures we can't
703 : * use it by using uncached buffers that don't have the perag attached
704 : * so we can detect and avoid this problem.
705 : */
706 898656 : if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
707 0 : return __this_address;
708 :
709 1758033377 : for (i = 0; i < xfs_agfl_size(mp); i++) {
710 878089620 : if (be32_to_cpu(agfl_bno[i]) != NULLAGBLOCK &&
711 139918834 : be32_to_cpu(agfl_bno[i]) >= mp->m_sb.sb_agblocks)
712 0 : return __this_address;
713 : }
714 :
715 898656 : if (!xfs_log_check_lsn(mp, be64_to_cpu(XFS_BUF_TO_AGFL(bp)->agfl_lsn)))
716 0 : return __this_address;
717 : return NULL;
718 : }
719 :
720 : static void
721 183049 : xfs_agfl_read_verify(
722 : struct xfs_buf *bp)
723 : {
724 183049 : struct xfs_mount *mp = bp->b_mount;
725 183049 : xfs_failaddr_t fa;
726 :
727 : /*
728 : * There is no verification of non-crc AGFLs because mkfs does not
729 : * initialise the AGFL to zero or NULL. Hence the only valid part of the
730 : * AGFL is what the AGF says is active. We can't get to the AGF, so we
731 : * can't verify just those entries are valid.
732 : */
733 183049 : if (!xfs_has_crc(mp))
734 : return;
735 :
736 182972 : if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF))
737 640 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
738 : else {
739 182332 : fa = xfs_agfl_verify(bp);
740 182332 : if (fa)
741 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
742 : }
743 : }
744 :
745 : static void
746 108287 : xfs_agfl_write_verify(
747 : struct xfs_buf *bp)
748 : {
749 108287 : struct xfs_mount *mp = bp->b_mount;
750 108287 : struct xfs_buf_log_item *bip = bp->b_log_item;
751 108287 : xfs_failaddr_t fa;
752 :
753 : /* no verification of non-crc AGFLs */
754 108287 : if (!xfs_has_crc(mp))
755 : return;
756 :
757 94690 : fa = xfs_agfl_verify(bp);
758 94690 : if (fa) {
759 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
760 0 : return;
761 : }
762 :
763 94690 : if (bip)
764 76682 : XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn);
765 :
766 94690 : xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);
767 : }
768 :
769 : const struct xfs_buf_ops xfs_agfl_buf_ops = {
770 : .name = "xfs_agfl",
771 : .magic = { cpu_to_be32(XFS_AGFL_MAGIC), cpu_to_be32(XFS_AGFL_MAGIC) },
772 : .verify_read = xfs_agfl_read_verify,
773 : .verify_write = xfs_agfl_write_verify,
774 : .verify_struct = xfs_agfl_verify,
775 : };
776 :
777 : /*
778 : * Read in the allocation group free block array.
779 : */
780 : int
781 593404781 : xfs_alloc_read_agfl(
782 : struct xfs_perag *pag,
783 : struct xfs_trans *tp,
784 : struct xfs_buf **bpp)
785 : {
786 593404781 : struct xfs_mount *mp = pag->pag_mount;
787 593404781 : struct xfs_buf *bp;
788 593404781 : int error;
789 :
790 1780214343 : error = xfs_trans_read_buf(
791 593404781 : mp, tp, mp->m_ddev_targp,
792 593404781 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
793 593404781 : XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
794 593495449 : if (xfs_metadata_is_sick(error))
795 640 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
796 593495449 : if (error)
797 : return error;
798 593494748 : xfs_buf_set_ref(bp, XFS_AGFL_REF);
799 593467885 : *bpp = bp;
800 593467885 : return 0;
801 : }
802 :
803 : STATIC int
804 184761337 : xfs_alloc_update_counters(
805 : struct xfs_trans *tp,
806 : struct xfs_buf *agbp,
807 : long len)
808 : {
809 184761337 : struct xfs_agf *agf = agbp->b_addr;
810 :
811 184761337 : agbp->b_pag->pagf_freeblks += len;
812 184761337 : be32_add_cpu(&agf->agf_freeblks, len);
813 :
814 184761337 : if (unlikely(be32_to_cpu(agf->agf_freeblks) >
815 : be32_to_cpu(agf->agf_length))) {
816 0 : xfs_buf_mark_corrupt(agbp);
817 0 : xfs_ag_mark_sick(agbp->b_pag, XFS_SICK_AG_AGF);
818 0 : return -EFSCORRUPTED;
819 : }
820 :
821 184761337 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
822 184761337 : return 0;
823 : }
824 :
825 : /*
826 : * Block allocation algorithm and data structures.
827 : */
828 : struct xfs_alloc_cur {
829 : struct xfs_btree_cur *cnt; /* btree cursors */
830 : struct xfs_btree_cur *bnolt;
831 : struct xfs_btree_cur *bnogt;
832 : xfs_extlen_t cur_len;/* current search length */
833 : xfs_agblock_t rec_bno;/* extent startblock */
834 : xfs_extlen_t rec_len;/* extent length */
835 : xfs_agblock_t bno; /* alloc bno */
836 : xfs_extlen_t len; /* alloc len */
837 : xfs_extlen_t diff; /* diff from search bno */
838 : unsigned int busy_gen;/* busy state */
839 : bool busy;
840 : };
841 :
842 : /*
843 : * Set up cursors, etc. in the extent allocation cursor. This function can be
844 : * called multiple times to reset an initialized structure without having to
845 : * reallocate cursors.
846 : */
847 : static int
848 78995079 : xfs_alloc_cur_setup(
849 : struct xfs_alloc_arg *args,
850 : struct xfs_alloc_cur *acur)
851 : {
852 78995079 : int error;
853 78995079 : int i;
854 :
855 78995079 : acur->cur_len = args->maxlen;
856 78995079 : acur->rec_bno = 0;
857 78995079 : acur->rec_len = 0;
858 78995079 : acur->bno = 0;
859 78995079 : acur->len = 0;
860 78995079 : acur->diff = -1;
861 78995079 : acur->busy = false;
862 78995079 : acur->busy_gen = 0;
863 :
864 : /*
865 : * Perform an initial cntbt lookup to check for availability of maxlen
866 : * extents. If this fails, we'll return -ENOSPC to signal the caller to
867 : * attempt a small allocation.
868 : */
869 78995079 : if (!acur->cnt)
870 78847726 : acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
871 : args->agbp, args->pag, XFS_BTNUM_CNT);
872 79076220 : error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
873 79048596 : if (error)
874 : return error;
875 :
876 : /*
877 : * Allocate the bnobt left and right search cursors.
878 : */
879 79047006 : if (!acur->bnolt)
880 78932282 : acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
881 : args->agbp, args->pag, XFS_BTNUM_BNO);
882 79056795 : if (!acur->bnogt)
883 78938570 : acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
884 : args->agbp, args->pag, XFS_BTNUM_BNO);
885 79064587 : return i == 1 ? 0 : -ENOSPC;
886 : }
887 :
888 : static void
889 78934083 : xfs_alloc_cur_close(
890 : struct xfs_alloc_cur *acur,
891 : bool error)
892 : {
893 78934083 : int cur_error = XFS_BTREE_NOERROR;
894 :
895 78934083 : if (error)
896 3013 : cur_error = XFS_BTREE_ERROR;
897 :
898 78934083 : if (acur->cnt)
899 78934083 : xfs_btree_del_cursor(acur->cnt, cur_error);
900 78951306 : if (acur->bnolt)
901 78949716 : xfs_btree_del_cursor(acur->bnolt, cur_error);
902 78954897 : if (acur->bnogt)
903 78953307 : xfs_btree_del_cursor(acur->bnogt, cur_error);
904 78953581 : acur->cnt = acur->bnolt = acur->bnogt = NULL;
905 78953581 : }
906 :
907 : /*
908 : * Check an extent for allocation and track the best available candidate in the
909 : * allocation structure. The cursor is deactivated if it has entered an out of
910 : * range state based on allocation arguments. Optionally return the extent
911 : * extent geometry and allocation status if requested by the caller.
912 : */
913 : static int
914 5287111793 : xfs_alloc_cur_check(
915 : struct xfs_alloc_arg *args,
916 : struct xfs_alloc_cur *acur,
917 : struct xfs_btree_cur *cur,
918 : int *new)
919 : {
920 5287111793 : int error, i;
921 5287111793 : xfs_agblock_t bno, bnoa, bnew;
922 5287111793 : xfs_extlen_t len, lena, diff = -1;
923 5287111793 : bool busy;
924 5287111793 : unsigned busy_gen = 0;
925 5287111793 : bool deactivate = false;
926 5287111793 : bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
927 :
928 5287111793 : *new = 0;
929 :
930 5287111793 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
931 5284368828 : if (error)
932 : return error;
933 5284368828 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
934 0 : xfs_btree_mark_sick(cur);
935 0 : return -EFSCORRUPTED;
936 : }
937 :
938 : /*
939 : * Check minlen and deactivate a cntbt cursor if out of acceptable size
940 : * range (i.e., walking backwards looking for a minlen extent).
941 : */
942 5284368828 : if (len < args->minlen) {
943 339184922 : deactivate = !isbnobt;
944 339184922 : goto out;
945 : }
946 :
947 4945183906 : busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
948 : &busy_gen);
949 4958858241 : acur->busy |= busy;
950 4958858241 : if (busy)
951 2831955146 : acur->busy_gen = busy_gen;
952 : /* deactivate a bnobt cursor outside of locality range */
953 4958858241 : if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
954 22260 : deactivate = isbnobt;
955 22260 : goto out;
956 : }
957 4958835981 : if (lena < args->minlen)
958 2683502712 : goto out;
959 :
960 2275333269 : args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
961 2275333269 : xfs_alloc_fix_len(args);
962 2274565054 : ASSERT(args->len >= args->minlen);
963 2274565054 : if (args->len < acur->len)
964 2264142 : goto out;
965 :
966 : /*
967 : * We have an aligned record that satisfies minlen and beats or matches
968 : * the candidate extent size. Compare locality for near allocation mode.
969 : */
970 2272300912 : diff = xfs_alloc_compute_diff(args->agbno, args->len,
971 : args->alignment, args->datatype,
972 : bnoa, lena, &bnew);
973 2272771188 : if (bnew == NULLAGBLOCK)
974 0 : goto out;
975 :
976 : /*
977 : * Deactivate a bnobt cursor with worse locality than the current best.
978 : */
979 2272771188 : if (diff > acur->diff) {
980 1723036091 : deactivate = isbnobt;
981 1723036091 : goto out;
982 : }
983 :
984 549735097 : ASSERT(args->len > acur->len ||
985 : (args->len == acur->len && diff <= acur->diff));
986 549735097 : acur->rec_bno = bno;
987 549735097 : acur->rec_len = len;
988 549735097 : acur->bno = bnew;
989 549735097 : acur->len = args->len;
990 549735097 : acur->diff = diff;
991 549735097 : *new = 1;
992 :
993 : /*
994 : * We're done if we found a perfect allocation. This only deactivates
995 : * the current cursor, but this is just an optimization to terminate a
996 : * cntbt search that otherwise runs to the edge of the tree.
997 : */
998 549735097 : if (acur->diff == 0 && acur->len == args->maxlen)
999 : deactivate = true;
1000 528012175 : out:
1001 4748010127 : if (deactivate)
1002 33144954 : cur->bc_ag.abt.active = false;
1003 5297745224 : trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
1004 5297745224 : *new);
1005 5297745224 : return 0;
1006 : }
1007 :
1008 : /*
1009 : * Complete an allocation of a candidate extent. Remove the extent from both
1010 : * trees and update the args structure.
1011 : */
1012 : STATIC int
1013 78943825 : xfs_alloc_cur_finish(
1014 : struct xfs_alloc_arg *args,
1015 : struct xfs_alloc_cur *acur)
1016 : {
1017 78943825 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1018 78943825 : int error;
1019 :
1020 78943825 : ASSERT(acur->cnt && acur->bnolt);
1021 78943825 : ASSERT(acur->bno >= acur->rec_bno);
1022 78943825 : ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
1023 78943825 : ASSERT(acur->rec_bno + acur->rec_len <= be32_to_cpu(agf->agf_length));
1024 :
1025 78943825 : error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
1026 : acur->rec_len, acur->bno, acur->len, 0);
1027 78939022 : if (error)
1028 : return error;
1029 :
1030 78935799 : args->agbno = acur->bno;
1031 78935799 : args->len = acur->len;
1032 78935799 : args->wasfromfl = 0;
1033 :
1034 78935799 : trace_xfs_alloc_cur(args);
1035 78935799 : return 0;
1036 : }
1037 :
1038 : /*
1039 : * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
1040 : * bno optimized lookup to search for extents with ideal size and locality.
1041 : */
1042 : STATIC int
1043 192796432 : xfs_alloc_cntbt_iter(
1044 : struct xfs_alloc_arg *args,
1045 : struct xfs_alloc_cur *acur)
1046 : {
1047 192796432 : struct xfs_btree_cur *cur = acur->cnt;
1048 192796432 : xfs_agblock_t bno;
1049 192796432 : xfs_extlen_t len, cur_len;
1050 192796432 : int error;
1051 192796432 : int i;
1052 :
1053 192796432 : if (!xfs_alloc_cur_active(cur))
1054 : return 0;
1055 :
1056 : /* locality optimized lookup */
1057 192502109 : cur_len = acur->cur_len;
1058 192502109 : error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
1059 192543883 : if (error)
1060 : return error;
1061 192543834 : if (i == 0)
1062 : return 0;
1063 176965391 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1064 176938196 : if (error)
1065 : return error;
1066 :
1067 : /* check the current record and update search length from it */
1068 176940157 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1069 177010510 : if (error)
1070 : return error;
1071 177010510 : ASSERT(len >= acur->cur_len);
1072 177010510 : acur->cur_len = len;
1073 :
1074 : /*
1075 : * We looked up the first record >= [agbno, len] above. The agbno is a
1076 : * secondary key and so the current record may lie just before or after
1077 : * agbno. If it is past agbno, check the previous record too so long as
1078 : * the length matches as it may be closer. Don't check a smaller record
1079 : * because that could deactivate our cursor.
1080 : */
1081 177010510 : if (bno > args->agbno) {
1082 172849585 : error = xfs_btree_decrement(cur, 0, &i);
1083 172813624 : if (!error && i) {
1084 170978735 : error = xfs_alloc_get_rec(cur, &bno, &len, &i);
1085 170966292 : if (!error && i && len == acur->cur_len)
1086 41154625 : error = xfs_alloc_cur_check(args, acur, cur,
1087 : &i);
1088 : }
1089 172807581 : if (error)
1090 : return error;
1091 : }
1092 :
1093 : /*
1094 : * Increment the search key until we find at least one allocation
1095 : * candidate or if the extent we found was larger. Otherwise, double the
1096 : * search key to optimize the search. Efficiency is more important here
1097 : * than absolute best locality.
1098 : */
1099 176968506 : cur_len <<= 1;
1100 176968506 : if (!acur->len || acur->cur_len >= cur_len)
1101 127314833 : acur->cur_len++;
1102 : else
1103 49653673 : acur->cur_len = cur_len;
1104 :
1105 : return error;
1106 : }
1107 :
1108 : /*
1109 : * Deal with the case where only small freespaces remain. Either return the
1110 : * contents of the last freespace record, or allocate space from the freelist if
1111 : * there is nothing in the tree.
1112 : */
1113 : STATIC int /* error */
1114 1181871 : xfs_alloc_ag_vextent_small(
1115 : struct xfs_alloc_arg *args, /* allocation argument structure */
1116 : struct xfs_btree_cur *ccur, /* optional by-size cursor */
1117 : xfs_agblock_t *fbnop, /* result block number */
1118 : xfs_extlen_t *flenp, /* result length */
1119 : int *stat) /* status: 0-freelist, 1-normal/none */
1120 : {
1121 1181871 : struct xfs_agf *agf = args->agbp->b_addr;
1122 1181871 : int error = 0;
1123 1181871 : xfs_agblock_t fbno = NULLAGBLOCK;
1124 1181871 : xfs_extlen_t flen = 0;
1125 1181871 : int i = 0;
1126 :
1127 : /*
1128 : * If a cntbt cursor is provided, try to allocate the largest record in
1129 : * the tree. Try the AGFL if the cntbt is empty, otherwise fail the
1130 : * allocation. Make sure to respect minleft even when pulling from the
1131 : * freelist.
1132 : */
1133 1181871 : if (ccur)
1134 1181871 : error = xfs_btree_decrement(ccur, 0, &i);
1135 1181872 : if (error)
1136 0 : goto error;
1137 1181872 : if (i) {
1138 1181872 : error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
1139 1181870 : if (error)
1140 0 : goto error;
1141 1181870 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1142 0 : xfs_btree_mark_sick(ccur);
1143 0 : error = -EFSCORRUPTED;
1144 0 : goto error;
1145 : }
1146 1181870 : goto out;
1147 : }
1148 :
1149 0 : if (args->minlen != 1 || args->alignment != 1 ||
1150 0 : args->resv == XFS_AG_RESV_AGFL ||
1151 0 : be32_to_cpu(agf->agf_flcount) <= args->minleft)
1152 0 : goto out;
1153 :
1154 0 : error = xfs_alloc_get_freelist(args->pag, args->tp, args->agbp,
1155 : &fbno, 0);
1156 0 : if (error)
1157 0 : goto error;
1158 0 : if (fbno == NULLAGBLOCK)
1159 0 : goto out;
1160 :
1161 0 : xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
1162 0 : (args->datatype & XFS_ALLOC_NOBUSY));
1163 :
1164 0 : if (args->datatype & XFS_ALLOC_USERDATA) {
1165 0 : struct xfs_buf *bp;
1166 :
1167 0 : error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
1168 0 : XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
1169 0 : args->mp->m_bsize, 0, &bp);
1170 0 : if (error)
1171 0 : goto error;
1172 0 : xfs_trans_binval(args->tp, bp);
1173 : }
1174 0 : *fbnop = args->agbno = fbno;
1175 0 : *flenp = args->len = 1;
1176 0 : if (XFS_IS_CORRUPT(args->mp, fbno >= be32_to_cpu(agf->agf_length))) {
1177 0 : xfs_btree_mark_sick(ccur);
1178 0 : error = -EFSCORRUPTED;
1179 0 : goto error;
1180 : }
1181 0 : args->wasfromfl = 1;
1182 0 : trace_xfs_alloc_small_freelist(args);
1183 :
1184 : /*
1185 : * If we're feeding an AGFL block to something that doesn't live in the
1186 : * free space, we need to clear out the OWN_AG rmap.
1187 : */
1188 0 : error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1,
1189 : &XFS_RMAP_OINFO_AG);
1190 0 : if (error)
1191 0 : goto error;
1192 :
1193 0 : *stat = 0;
1194 0 : return 0;
1195 :
1196 1181870 : out:
1197 : /*
1198 : * Can't do the allocation, give up.
1199 : */
1200 1181870 : if (flen < args->minlen) {
1201 0 : args->agbno = NULLAGBLOCK;
1202 0 : trace_xfs_alloc_small_notenough(args);
1203 0 : flen = 0;
1204 : }
1205 1181870 : *fbnop = fbno;
1206 1181870 : *flenp = flen;
1207 1181870 : *stat = 1;
1208 1181870 : trace_xfs_alloc_small_done(args);
1209 1181870 : return 0;
1210 :
1211 0 : error:
1212 0 : trace_xfs_alloc_small_error(args);
1213 0 : return error;
1214 : }
1215 :
1216 : /*
1217 : * Allocate a variable extent at exactly agno/bno.
1218 : * Extent's length (returned in *len) will be between minlen and maxlen,
1219 : * and of the form k * prod + mod unless there's nothing that large.
1220 : * Return the starting a.g. block (bno), or NULLAGBLOCK if we can't do it.
1221 : */
1222 : STATIC int /* error */
1223 1052791 : xfs_alloc_ag_vextent_exact(
1224 : xfs_alloc_arg_t *args) /* allocation argument structure */
1225 : {
1226 1052791 : struct xfs_agf __maybe_unused *agf = args->agbp->b_addr;
1227 1052791 : struct xfs_btree_cur *bno_cur;/* by block-number btree cursor */
1228 1052791 : struct xfs_btree_cur *cnt_cur;/* by count btree cursor */
1229 1052791 : int error;
1230 1052791 : xfs_agblock_t fbno; /* start block of found extent */
1231 1052791 : xfs_extlen_t flen; /* length of found extent */
1232 1052791 : xfs_agblock_t tbno; /* start block of busy extent */
1233 1052791 : xfs_extlen_t tlen; /* length of busy extent */
1234 1052791 : xfs_agblock_t tend; /* end block of busy extent */
1235 1052791 : int i; /* success/failure of operation */
1236 1052791 : unsigned busy_gen;
1237 :
1238 1052791 : ASSERT(args->alignment == 1);
1239 :
1240 : /*
1241 : * Allocate/initialize a cursor for the by-number freespace btree.
1242 : */
1243 1052791 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1244 : args->pag, XFS_BTNUM_BNO);
1245 :
1246 : /*
1247 : * Lookup bno and minlen in the btree (minlen is irrelevant, really).
1248 : * Look for the closest free block <= bno, it must contain bno
1249 : * if any free block does.
1250 : */
1251 1052865 : error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i);
1252 1052822 : if (error)
1253 0 : goto error0;
1254 1052822 : if (!i)
1255 69887 : goto not_found;
1256 :
1257 : /*
1258 : * Grab the freespace record.
1259 : */
1260 982935 : error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
1261 982792 : if (error)
1262 0 : goto error0;
1263 982792 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1264 0 : xfs_btree_mark_sick(bno_cur);
1265 0 : error = -EFSCORRUPTED;
1266 0 : goto error0;
1267 : }
1268 982792 : ASSERT(fbno <= args->agbno);
1269 :
1270 : /*
1271 : * Check for overlapping busy extents.
1272 : */
1273 982792 : tbno = fbno;
1274 982792 : tlen = flen;
1275 982792 : xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
1276 :
1277 : /*
1278 : * Give up if the start of the extent is busy, or the freespace isn't
1279 : * long enough for the minimum request.
1280 : */
1281 982873 : if (tbno > args->agbno)
1282 3955 : goto not_found;
1283 978918 : if (tlen < args->minlen)
1284 540277 : goto not_found;
1285 438641 : tend = tbno + tlen;
1286 438641 : if (tend < args->agbno + args->minlen)
1287 10717 : goto not_found;
1288 :
1289 : /*
1290 : * End of extent will be smaller of the freespace end and the
1291 : * maximal requested end.
1292 : *
1293 : * Fix the length according to mod and prod if given.
1294 : */
1295 427924 : args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen)
1296 427924 : - args->agbno;
1297 427924 : xfs_alloc_fix_len(args);
1298 427874 : ASSERT(args->agbno + args->len <= tend);
1299 :
1300 : /*
1301 : * We are allocating agbno for args->len
1302 : * Allocate/initialize a cursor for the by-size btree.
1303 : */
1304 427874 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1305 : args->pag, XFS_BTNUM_CNT);
1306 428009 : ASSERT(args->agbno + args->len <= be32_to_cpu(agf->agf_length));
1307 428009 : error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
1308 : args->len, XFSA_FIXUP_BNO_OK);
1309 428015 : if (error) {
1310 0 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1311 0 : goto error0;
1312 : }
1313 :
1314 428015 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1315 428017 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1316 :
1317 428054 : args->wasfromfl = 0;
1318 428054 : trace_xfs_alloc_exact_done(args);
1319 428054 : return 0;
1320 :
1321 624836 : not_found:
1322 : /* Didn't find it, return null. */
1323 624836 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1324 624841 : args->agbno = NULLAGBLOCK;
1325 624841 : trace_xfs_alloc_exact_notfound(args);
1326 624841 : return 0;
1327 :
1328 0 : error0:
1329 0 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1330 0 : trace_xfs_alloc_exact_error(args);
1331 0 : return error;
1332 : }
1333 :
1334 : /*
1335 : * Search a given number of btree records in a given direction. Check each
1336 : * record against the good extent we've already found.
1337 : */
1338 : STATIC int
1339 491618510 : xfs_alloc_walk_iter(
1340 : struct xfs_alloc_arg *args,
1341 : struct xfs_alloc_cur *acur,
1342 : struct xfs_btree_cur *cur,
1343 : bool increment,
1344 : bool find_one, /* quit on first candidate */
1345 : int count, /* rec count (-1 for infinite) */
1346 : int *stat)
1347 : {
1348 491618510 : int error;
1349 491618510 : int i;
1350 :
1351 491618510 : *stat = 0;
1352 :
1353 : /*
1354 : * Search so long as the cursor is active or we find a better extent.
1355 : * The cursor is deactivated if it extends beyond the range of the
1356 : * current allocation candidate.
1357 : */
1358 5499972245 : while (xfs_alloc_cur_active(cur) && count) {
1359 5067048243 : error = xfs_alloc_cur_check(args, acur, cur, &i);
1360 5076863104 : if (error)
1361 0 : return error;
1362 5076863104 : if (i == 1) {
1363 521617189 : *stat = 1;
1364 521617189 : if (find_one)
1365 : break;
1366 : }
1367 5031342175 : if (!xfs_alloc_cur_active(cur))
1368 : break;
1369 :
1370 5011158369 : if (increment)
1371 3569083869 : error = xfs_btree_increment(cur, 0, &i);
1372 : else
1373 1442074500 : error = xfs_btree_decrement(cur, 0, &i);
1374 5008353746 : if (error)
1375 11 : return error;
1376 5008353735 : if (i == 0)
1377 29641257 : cur->bc_ag.abt.active = false;
1378 :
1379 5008353735 : if (count > 0)
1380 354714878 : count--;
1381 : }
1382 :
1383 : return 0;
1384 : }
1385 :
1386 : /*
1387 : * Search the by-bno and by-size btrees in parallel in search of an extent with
1388 : * ideal locality based on the NEAR mode ->agbno locality hint.
1389 : */
1390 : STATIC int
1391 47522285 : xfs_alloc_ag_vextent_locality(
1392 : struct xfs_alloc_arg *args,
1393 : struct xfs_alloc_cur *acur,
1394 : int *stat)
1395 : {
1396 47522285 : struct xfs_btree_cur *fbcur = NULL;
1397 47522285 : int error;
1398 47522285 : int i;
1399 47522285 : bool fbinc;
1400 :
1401 47522285 : ASSERT(acur->len == 0);
1402 :
1403 47522285 : *stat = 0;
1404 :
1405 47522285 : error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
1406 47534989 : if (error)
1407 : return error;
1408 47533089 : error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
1409 47537721 : if (error)
1410 : return error;
1411 47537143 : error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
1412 47537310 : if (error)
1413 : return error;
1414 :
1415 : /*
1416 : * Search the bnobt and cntbt in parallel. Search the bnobt left and
1417 : * right and lookup the closest extent to the locality hint for each
1418 : * extent size key in the cntbt. The entire search terminates
1419 : * immediately on a bnobt hit because that means we've found best case
1420 : * locality. Otherwise the search continues until the cntbt cursor runs
1421 : * off the end of the tree. If no allocation candidate is found at this
1422 : * point, give up on locality, walk backwards from the end of the cntbt
1423 : * and take the first available extent.
1424 : *
1425 : * The parallel tree searches balance each other out to provide fairly
1426 : * consistent performance for various situations. The bnobt search can
1427 : * have pathological behavior in the worst case scenario of larger
1428 : * allocation requests and fragmented free space. On the other hand, the
1429 : * bnobt is able to satisfy most smaller allocation requests much more
1430 : * quickly than the cntbt. The cntbt search can sift through fragmented
1431 : * free space and sets of free extents for larger allocation requests
1432 : * more quickly than the bnobt. Since the locality hint is just a hint
1433 : * and we don't want to scan the entire bnobt for perfect locality, the
1434 : * cntbt search essentially bounds the bnobt search such that we can
1435 : * find good enough locality at reasonable performance in most cases.
1436 : */
1437 40767382 : while (xfs_alloc_cur_active(acur->bnolt) ||
1438 224486738 : xfs_alloc_cur_active(acur->bnogt) ||
1439 21961 : xfs_alloc_cur_active(acur->cnt)) {
1440 :
1441 224466190 : trace_xfs_alloc_cur_lookup(args);
1442 :
1443 : /*
1444 : * Search the bnobt left and right. In the case of a hit, finish
1445 : * the search in the opposite direction and we're done.
1446 : */
1447 224468795 : error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
1448 : true, 1, &i);
1449 224460289 : if (error)
1450 1 : return error;
1451 224460288 : if (i == 1) {
1452 21105218 : trace_xfs_alloc_cur_left(args);
1453 21103659 : fbcur = acur->bnogt;
1454 21103659 : fbinc = true;
1455 21103659 : break;
1456 : }
1457 203355070 : error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
1458 : 1, &i);
1459 203355897 : if (error)
1460 0 : return error;
1461 203355897 : if (i == 1) {
1462 10558679 : trace_xfs_alloc_cur_right(args);
1463 10558842 : fbcur = acur->bnolt;
1464 10558842 : fbinc = false;
1465 10558842 : break;
1466 : }
1467 :
1468 : /*
1469 : * Check the extent with best locality based on the current
1470 : * extent size search key and keep track of the best candidate.
1471 : */
1472 192797218 : error = xfs_alloc_cntbt_iter(args, acur);
1473 192820562 : if (error)
1474 49 : return error;
1475 192820513 : if (!xfs_alloc_cur_active(acur->cnt)) {
1476 15871562 : trace_xfs_alloc_cur_lookup_done(args);
1477 15871562 : break;
1478 : }
1479 : }
1480 :
1481 : /*
1482 : * If we failed to find anything due to busy extents, return empty
1483 : * handed so the caller can flush and retry. If no busy extents were
1484 : * found, walk backwards from the end of the cntbt as a last resort.
1485 : */
1486 47533998 : if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
1487 284172 : error = xfs_btree_decrement(acur->cnt, 0, &i);
1488 284171 : if (error)
1489 : return error;
1490 284171 : if (i) {
1491 284171 : acur->cnt->bc_ag.abt.active = true;
1492 284171 : fbcur = acur->cnt;
1493 284171 : fbinc = false;
1494 : }
1495 : }
1496 :
1497 : /*
1498 : * Search in the opposite direction for a better entry in the case of
1499 : * a bnobt hit or walk backwards from the end of the cntbt.
1500 : */
1501 47533502 : if (fbcur) {
1502 31946936 : error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
1503 : &i);
1504 31950501 : if (error)
1505 : return error;
1506 : }
1507 :
1508 47537057 : if (acur->len)
1509 47420307 : *stat = 1;
1510 :
1511 : return 0;
1512 : }
1513 :
1514 : /* Check the last block of the cnt btree for allocations. */
1515 : static int
1516 63978734 : xfs_alloc_ag_vextent_lastblock(
1517 : struct xfs_alloc_arg *args,
1518 : struct xfs_alloc_cur *acur,
1519 : xfs_agblock_t *bno,
1520 : xfs_extlen_t *len,
1521 : bool *allocated)
1522 : {
1523 63978734 : int error;
1524 63978734 : int i;
1525 :
1526 : #ifdef DEBUG
1527 : /* Randomly don't execute the first algorithm. */
1528 63978734 : if (get_random_u32_below(2))
1529 : return 0;
1530 : #endif
1531 :
1532 : /*
1533 : * Start from the entry that lookup found, sequence through all larger
1534 : * free blocks. If we're actually pointing at a record smaller than
1535 : * maxlen, go to the start of this block, and skip all those smaller
1536 : * than minlen.
1537 : */
1538 31987660 : if (*len || args->alignment > 1) {
1539 1062089 : acur->cnt->bc_levels[0].ptr = 1;
1540 118298874 : do {
1541 118298874 : error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
1542 118299744 : if (error)
1543 0 : return error;
1544 118299744 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1545 0 : xfs_btree_mark_sick(acur->cnt);
1546 0 : return -EFSCORRUPTED;
1547 : }
1548 118299744 : if (*len >= args->minlen)
1549 : break;
1550 117237647 : error = xfs_btree_increment(acur->cnt, 0, &i);
1551 117236785 : if (error)
1552 0 : return error;
1553 117236785 : } while (i);
1554 1062097 : ASSERT(*len >= args->minlen);
1555 1062097 : if (!i)
1556 : return 0;
1557 : }
1558 :
1559 31987668 : error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
1560 32007197 : if (error)
1561 : return error;
1562 :
1563 : /*
1564 : * It didn't work. We COULD be in a case where there's a good record
1565 : * somewhere, so try again.
1566 : */
1567 32007197 : if (acur->len == 0)
1568 : return 0;
1569 :
1570 31527020 : trace_xfs_alloc_near_first(args);
1571 31526888 : *allocated = true;
1572 31526888 : return 0;
1573 : }
1574 :
1575 : /*
1576 : * Allocate a variable extent near bno in the allocation group agno.
1577 : * Extent's length (returned in len) will be between minlen and maxlen,
1578 : * and of the form k * prod + mod unless there's nothing that large.
1579 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1580 : */
1581 : STATIC int
1582 78903374 : xfs_alloc_ag_vextent_near(
1583 : struct xfs_alloc_arg *args,
1584 : uint32_t alloc_flags)
1585 : {
1586 78903374 : struct xfs_alloc_cur acur = {};
1587 78903374 : int error; /* error code */
1588 78903374 : int i; /* result code, temporary */
1589 78903374 : xfs_agblock_t bno;
1590 78903374 : xfs_extlen_t len;
1591 :
1592 : /* handle uninitialized agbno range so caller doesn't have to */
1593 78903374 : if (!args->min_agbno && !args->max_agbno)
1594 78170376 : args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
1595 78903374 : ASSERT(args->min_agbno <= args->max_agbno);
1596 :
1597 : /* clamp agbno to the range if it's outside */
1598 78903374 : if (args->agbno < args->min_agbno)
1599 311858 : args->agbno = args->min_agbno;
1600 78903374 : if (args->agbno > args->max_agbno)
1601 0 : args->agbno = args->max_agbno;
1602 :
1603 : /* Retry once quickly if we find busy extents before blocking. */
1604 78903374 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1605 79015494 : restart:
1606 79015494 : len = 0;
1607 :
1608 : /*
1609 : * Set up cursors and see if there are any free extents as big as
1610 : * maxlen. If not, pick the last entry in the tree unless the tree is
1611 : * empty.
1612 : */
1613 79015494 : error = xfs_alloc_cur_setup(args, &acur);
1614 79060323 : if (error == -ENOSPC) {
1615 889409 : error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
1616 : &len, &i);
1617 889405 : if (error)
1618 0 : goto out;
1619 889405 : if (i == 0 || len == 0) {
1620 0 : trace_xfs_alloc_near_noentry(args);
1621 0 : goto out;
1622 : }
1623 889405 : ASSERT(i == 1);
1624 78170914 : } else if (error) {
1625 1590 : goto out;
1626 : }
1627 :
1628 : /*
1629 : * First algorithm.
1630 : * If the requested extent is large wrt the freespaces available
1631 : * in this a.g., then the cursor will be pointing to a btree entry
1632 : * near the right edge of the tree. If it's in the last btree leaf
1633 : * block, then we just examine all the entries in that block
1634 : * that are big enough, and pick the best one.
1635 : */
1636 79058729 : if (xfs_btree_islastblock(acur.cnt, 0)) {
1637 63984656 : bool allocated = false;
1638 :
1639 63984656 : error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
1640 : &allocated);
1641 63986975 : if (error)
1642 0 : goto out;
1643 63986975 : if (allocated)
1644 31526925 : goto alloc_finish;
1645 : }
1646 :
1647 : /*
1648 : * Second algorithm. Combined cntbt and bnobt search to find ideal
1649 : * locality.
1650 : */
1651 47513404 : error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
1652 47537873 : if (error)
1653 629 : goto out;
1654 :
1655 : /*
1656 : * If we couldn't get anything, give up.
1657 : */
1658 47537244 : if (!acur.len) {
1659 116750 : if (acur.busy) {
1660 : /*
1661 : * Our only valid extents must have been busy. Flush and
1662 : * retry the allocation again. If we get an -EAGAIN
1663 : * error, we're being told that a deadlock was avoided
1664 : * and the current transaction needs committing before
1665 : * the allocation can be retried.
1666 : */
1667 112123 : trace_xfs_alloc_near_busy(args);
1668 112123 : error = xfs_extent_busy_flush(args->tp, args->pag,
1669 : acur.busy_gen, alloc_flags);
1670 112121 : if (error)
1671 1 : goto out;
1672 :
1673 112120 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1674 112120 : goto restart;
1675 : }
1676 4627 : trace_xfs_alloc_size_neither(args);
1677 4627 : args->agbno = NULLAGBLOCK;
1678 4627 : goto out;
1679 : }
1680 :
1681 47420494 : alloc_finish:
1682 : /* fix up btrees on a successful allocation */
1683 78947419 : error = xfs_alloc_cur_finish(args, &acur);
1684 :
1685 78937514 : out:
1686 78937514 : xfs_alloc_cur_close(&acur, error);
1687 78952575 : return error;
1688 : }
1689 :
1690 : /*
1691 : * Allocate a variable extent anywhere in the allocation group agno.
1692 : * Extent's length (returned in len) will be between minlen and maxlen,
1693 : * and of the form k * prod + mod unless there's nothing that large.
1694 : * Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
1695 : */
1696 : static int
1697 6754131 : xfs_alloc_ag_vextent_size(
1698 : struct xfs_alloc_arg *args,
1699 : uint32_t alloc_flags)
1700 : {
1701 6754131 : struct xfs_agf *agf = args->agbp->b_addr;
1702 6754131 : struct xfs_btree_cur *bno_cur;
1703 6754131 : struct xfs_btree_cur *cnt_cur;
1704 6754131 : xfs_agblock_t fbno; /* start of found freespace */
1705 6754131 : xfs_extlen_t flen; /* length of found freespace */
1706 6754131 : xfs_agblock_t rbno; /* returned block number */
1707 6754131 : xfs_extlen_t rlen; /* length of returned extent */
1708 6754131 : bool busy;
1709 6754131 : unsigned busy_gen;
1710 6754131 : int error;
1711 6754131 : int i;
1712 :
1713 : /* Retry once quickly if we find busy extents before blocking. */
1714 6754131 : alloc_flags |= XFS_ALLOC_FLAG_TRYFLUSH;
1715 : restart:
1716 : /*
1717 : * Allocate and initialize a cursor for the by-size btree.
1718 : */
1719 6792646 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1720 : args->pag, XFS_BTNUM_CNT);
1721 6793267 : bno_cur = NULL;
1722 :
1723 : /*
1724 : * Look for an entry >= maxlen+alignment-1 blocks.
1725 : */
1726 6793317 : if ((error = xfs_alloc_lookup_ge(cnt_cur, 0,
1727 6793267 : args->maxlen + args->alignment - 1, &i)))
1728 90 : goto error0;
1729 :
1730 : /*
1731 : * If none then we have to settle for a smaller extent. In the case that
1732 : * there are no large extents, this will return the last entry in the
1733 : * tree unless the tree is empty. In the case that there are only busy
1734 : * large extents, this will return the largest small extent unless there
1735 : * are no smaller extents available.
1736 : */
1737 6793227 : if (!i) {
1738 292464 : error = xfs_alloc_ag_vextent_small(args, cnt_cur,
1739 : &fbno, &flen, &i);
1740 292463 : if (error)
1741 0 : goto error0;
1742 292463 : if (i == 0 || flen == 0) {
1743 0 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1744 0 : trace_xfs_alloc_size_noentry(args);
1745 0 : return 0;
1746 : }
1747 292463 : ASSERT(i == 1);
1748 292463 : busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
1749 : &rlen, &busy_gen);
1750 : } else {
1751 : /*
1752 : * Search for a non-busy extent that is large enough.
1753 : */
1754 1261306675 : for (;;) {
1755 633903719 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
1756 633603437 : if (error)
1757 0 : goto error0;
1758 633603437 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1759 0 : xfs_btree_mark_sick(cnt_cur);
1760 0 : error = -EFSCORRUPTED;
1761 0 : goto error0;
1762 : }
1763 :
1764 633603437 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1765 : &rbno, &rlen, &busy_gen);
1766 :
1767 634266600 : if (rlen >= args->maxlen)
1768 : break;
1769 :
1770 627802655 : error = xfs_btree_increment(cnt_cur, 0, &i);
1771 627440086 : if (error)
1772 0 : goto error0;
1773 627440086 : if (i)
1774 627402956 : continue;
1775 :
1776 : /*
1777 : * Our only valid extents must have been busy. Flush and
1778 : * retry the allocation again. If we get an -EAGAIN
1779 : * error, we're being told that a deadlock was avoided
1780 : * and the current transaction needs committing before
1781 : * the allocation can be retried.
1782 : */
1783 37130 : trace_xfs_alloc_size_busy(args);
1784 37130 : error = xfs_extent_busy_flush(args->tp, args->pag,
1785 : busy_gen, alloc_flags);
1786 37128 : if (error)
1787 1 : goto error0;
1788 :
1789 37127 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1790 37127 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1791 37129 : goto restart;
1792 : }
1793 : }
1794 :
1795 : /*
1796 : * In the first case above, we got the last entry in the
1797 : * by-size btree. Now we check to see if the space hits maxlen
1798 : * once aligned; if not, we search left for something better.
1799 : * This can't happen in the second case above.
1800 : */
1801 6756117 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1802 6756117 : if (XFS_IS_CORRUPT(args->mp,
1803 : rlen != 0 &&
1804 : (rlen > flen ||
1805 : rbno + rlen > fbno + flen))) {
1806 0 : xfs_btree_mark_sick(cnt_cur);
1807 0 : error = -EFSCORRUPTED;
1808 0 : goto error0;
1809 : }
1810 6756117 : if (rlen < args->maxlen) {
1811 292458 : xfs_agblock_t bestfbno;
1812 292458 : xfs_extlen_t bestflen;
1813 292458 : xfs_agblock_t bestrbno;
1814 292458 : xfs_extlen_t bestrlen;
1815 :
1816 292458 : bestrlen = rlen;
1817 292458 : bestrbno = rbno;
1818 292458 : bestflen = flen;
1819 292458 : bestfbno = fbno;
1820 86443550 : for (;;) {
1821 86443550 : if ((error = xfs_btree_decrement(cnt_cur, 0, &i)))
1822 0 : goto error0;
1823 86443549 : if (i == 0)
1824 : break;
1825 86440415 : if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
1826 : &i)))
1827 0 : goto error0;
1828 86440407 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1829 0 : xfs_btree_mark_sick(cnt_cur);
1830 0 : error = -EFSCORRUPTED;
1831 0 : goto error0;
1832 : }
1833 86440407 : if (flen < bestrlen)
1834 : break;
1835 86151083 : busy = xfs_alloc_compute_aligned(args, fbno, flen,
1836 : &rbno, &rlen, &busy_gen);
1837 86151093 : rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
1838 86151093 : if (XFS_IS_CORRUPT(args->mp,
1839 : rlen != 0 &&
1840 : (rlen > flen ||
1841 : rbno + rlen > fbno + flen))) {
1842 0 : xfs_btree_mark_sick(cnt_cur);
1843 0 : error = -EFSCORRUPTED;
1844 0 : goto error0;
1845 : }
1846 86151093 : if (rlen > bestrlen) {
1847 160466 : bestrlen = rlen;
1848 160466 : bestrbno = rbno;
1849 160466 : bestflen = flen;
1850 160466 : bestfbno = fbno;
1851 160466 : if (rlen == args->maxlen)
1852 : break;
1853 : }
1854 : }
1855 292459 : if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
1856 : &i)))
1857 0 : goto error0;
1858 292458 : if (XFS_IS_CORRUPT(args->mp, i != 1)) {
1859 0 : xfs_btree_mark_sick(cnt_cur);
1860 0 : error = -EFSCORRUPTED;
1861 0 : goto error0;
1862 : }
1863 292458 : rlen = bestrlen;
1864 292458 : rbno = bestrbno;
1865 292458 : flen = bestflen;
1866 292458 : fbno = bestfbno;
1867 : }
1868 6756117 : args->wasfromfl = 0;
1869 : /*
1870 : * Fix up the length.
1871 : */
1872 6756117 : args->len = rlen;
1873 6756117 : if (rlen < args->minlen) {
1874 157183 : if (busy) {
1875 : /*
1876 : * Our only valid extents must have been busy. Flush and
1877 : * retry the allocation again. If we get an -EAGAIN
1878 : * error, we're being told that a deadlock was avoided
1879 : * and the current transaction needs committing before
1880 : * the allocation can be retried.
1881 : */
1882 1386 : trace_xfs_alloc_size_busy(args);
1883 1386 : error = xfs_extent_busy_flush(args->tp, args->pag,
1884 : busy_gen, alloc_flags);
1885 1386 : if (error)
1886 0 : goto error0;
1887 :
1888 1386 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYFLUSH;
1889 1386 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1890 1386 : goto restart;
1891 : }
1892 155797 : goto out_nominleft;
1893 : }
1894 6598934 : xfs_alloc_fix_len(args);
1895 :
1896 6598357 : rlen = args->len;
1897 6598357 : if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
1898 0 : xfs_btree_mark_sick(cnt_cur);
1899 0 : error = -EFSCORRUPTED;
1900 0 : goto error0;
1901 : }
1902 : /*
1903 : * Allocate and initialize a cursor for the by-block tree.
1904 : */
1905 6598357 : bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
1906 : args->pag, XFS_BTNUM_BNO);
1907 6599573 : if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
1908 : rbno, rlen, XFSA_FIXUP_CNT_OK)))
1909 104 : goto error0;
1910 6598555 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1911 6599316 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
1912 6599710 : cnt_cur = bno_cur = NULL;
1913 6599710 : args->len = rlen;
1914 6599710 : args->agbno = rbno;
1915 6599710 : if (XFS_IS_CORRUPT(args->mp,
1916 : args->agbno + args->len >
1917 : be32_to_cpu(agf->agf_length))) {
1918 0 : xfs_ag_mark_sick(args->pag, XFS_SICK_AG_BNOBT);
1919 0 : error = -EFSCORRUPTED;
1920 0 : goto error0;
1921 : }
1922 6599710 : trace_xfs_alloc_size_done(args);
1923 6599710 : return 0;
1924 :
1925 195 : error0:
1926 195 : trace_xfs_alloc_size_error(args);
1927 195 : if (cnt_cur)
1928 195 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
1929 195 : if (bno_cur)
1930 104 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
1931 : return error;
1932 :
1933 : out_nominleft:
1934 155797 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
1935 155797 : trace_xfs_alloc_size_nominleft(args);
1936 155797 : args->agbno = NULLAGBLOCK;
1937 155797 : return 0;
1938 : }
1939 :
1940 : /*
1941 : * Free the extent starting at agno/bno for length.
1942 : */
1943 : STATIC int
1944 98811282 : xfs_free_ag_extent(
1945 : struct xfs_trans *tp,
1946 : struct xfs_buf *agbp,
1947 : xfs_agnumber_t agno,
1948 : xfs_agblock_t bno,
1949 : xfs_extlen_t len,
1950 : const struct xfs_owner_info *oinfo,
1951 : enum xfs_ag_resv_type type)
1952 : {
1953 98811282 : struct xfs_mount *mp;
1954 98811282 : struct xfs_btree_cur *bno_cur;
1955 98811282 : struct xfs_btree_cur *cnt_cur;
1956 98811282 : xfs_agblock_t gtbno; /* start of right neighbor */
1957 98811282 : xfs_extlen_t gtlen; /* length of right neighbor */
1958 98811282 : xfs_agblock_t ltbno; /* start of left neighbor */
1959 98811282 : xfs_extlen_t ltlen; /* length of left neighbor */
1960 98811282 : xfs_agblock_t nbno; /* new starting block of freesp */
1961 98811282 : xfs_extlen_t nlen; /* new length of freespace */
1962 98811282 : int haveleft; /* have a left neighbor */
1963 98811282 : int haveright; /* have a right neighbor */
1964 98811282 : int i;
1965 98811282 : int error;
1966 98811282 : struct xfs_perag *pag = agbp->b_pag;
1967 :
1968 98811282 : bno_cur = cnt_cur = NULL;
1969 98811282 : mp = tp->t_mountp;
1970 :
1971 98811282 : if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1972 3057448 : error = xfs_rmap_free(tp, agbp, pag, bno, len, oinfo);
1973 3057731 : if (error)
1974 26 : goto error0;
1975 : }
1976 :
1977 : /*
1978 : * Allocate and initialize a cursor for the by-block btree.
1979 : */
1980 98811539 : bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_BNO);
1981 : /*
1982 : * Look for a neighboring block on the left (lower block numbers)
1983 : * that is contiguous with this space.
1984 : */
1985 98816437 : if ((error = xfs_alloc_lookup_le(bno_cur, bno, len, &haveleft)))
1986 157 : goto error0;
1987 98817919 : if (haveleft) {
1988 : /*
1989 : * There is a block to our left.
1990 : */
1991 97081898 : if ((error = xfs_alloc_get_rec(bno_cur, <bno, <len, &i)))
1992 0 : goto error0;
1993 97075041 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1994 0 : xfs_btree_mark_sick(bno_cur);
1995 0 : error = -EFSCORRUPTED;
1996 0 : goto error0;
1997 : }
1998 : /*
1999 : * It's not contiguous, though.
2000 : */
2001 97075041 : if (ltbno + ltlen < bno)
2002 71097589 : haveleft = 0;
2003 : else {
2004 : /*
2005 : * If this failure happens the request to free this
2006 : * space was invalid, it's (partly) already free.
2007 : * Very bad.
2008 : */
2009 25977452 : if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
2010 0 : xfs_btree_mark_sick(bno_cur);
2011 0 : error = -EFSCORRUPTED;
2012 0 : goto error0;
2013 : }
2014 : }
2015 : }
2016 : /*
2017 : * Look for a neighboring block on the right (higher block numbers)
2018 : * that is contiguous with this space.
2019 : */
2020 98811062 : if ((error = xfs_btree_increment(bno_cur, 0, &haveright)))
2021 0 : goto error0;
2022 98811949 : if (haveright) {
2023 : /*
2024 : * There is a block to our right.
2025 : */
2026 97524350 : if ((error = xfs_alloc_get_rec(bno_cur, >bno, >len, &i)))
2027 0 : goto error0;
2028 97530846 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2029 0 : xfs_btree_mark_sick(bno_cur);
2030 0 : error = -EFSCORRUPTED;
2031 0 : goto error0;
2032 : }
2033 : /*
2034 : * It's not contiguous, though.
2035 : */
2036 97530846 : if (bno + len < gtbno)
2037 65980712 : haveright = 0;
2038 : else {
2039 : /*
2040 : * If this failure happens the request to free this
2041 : * space was invalid, it's (partly) already free.
2042 : * Very bad.
2043 : */
2044 31550134 : if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
2045 0 : xfs_btree_mark_sick(bno_cur);
2046 0 : error = -EFSCORRUPTED;
2047 0 : goto error0;
2048 : }
2049 : }
2050 : }
2051 : /*
2052 : * Now allocate and initialize a cursor for the by-size tree.
2053 : */
2054 98818445 : cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, pag, XFS_BTNUM_CNT);
2055 : /*
2056 : * Have both left and right contiguous neighbors.
2057 : * Merge all three into a single free block.
2058 : */
2059 98820020 : if (haveleft && haveright) {
2060 : /*
2061 : * Delete the old by-size entry on the left.
2062 : */
2063 14172318 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2064 20 : goto error0;
2065 14172651 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2066 0 : xfs_btree_mark_sick(cnt_cur);
2067 0 : error = -EFSCORRUPTED;
2068 0 : goto error0;
2069 : }
2070 14172651 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2071 0 : goto error0;
2072 14171051 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2073 0 : xfs_btree_mark_sick(cnt_cur);
2074 0 : error = -EFSCORRUPTED;
2075 0 : goto error0;
2076 : }
2077 : /*
2078 : * Delete the old by-size entry on the right.
2079 : */
2080 14171051 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2081 16 : goto error0;
2082 14172480 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2083 0 : xfs_btree_mark_sick(cnt_cur);
2084 0 : error = -EFSCORRUPTED;
2085 0 : goto error0;
2086 : }
2087 14172480 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2088 0 : goto error0;
2089 14172778 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2090 0 : xfs_btree_mark_sick(cnt_cur);
2091 0 : error = -EFSCORRUPTED;
2092 0 : goto error0;
2093 : }
2094 : /*
2095 : * Delete the old by-block entry for the right block.
2096 : */
2097 14172778 : if ((error = xfs_btree_delete(bno_cur, &i)))
2098 0 : goto error0;
2099 14172703 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2100 0 : xfs_btree_mark_sick(bno_cur);
2101 0 : error = -EFSCORRUPTED;
2102 0 : goto error0;
2103 : }
2104 : /*
2105 : * Move the by-block cursor back to the left neighbor.
2106 : */
2107 14172703 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2108 0 : goto error0;
2109 14171985 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2110 0 : xfs_btree_mark_sick(bno_cur);
2111 0 : error = -EFSCORRUPTED;
2112 0 : goto error0;
2113 : }
2114 : #ifdef DEBUG
2115 : /*
2116 : * Check that this is the right record: delete didn't
2117 : * mangle the cursor.
2118 : */
2119 : {
2120 14171985 : xfs_agblock_t xxbno;
2121 14171985 : xfs_extlen_t xxlen;
2122 :
2123 14171985 : if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
2124 : &i)))
2125 0 : goto error0;
2126 14172427 : if (XFS_IS_CORRUPT(mp,
2127 : i != 1 ||
2128 : xxbno != ltbno ||
2129 : xxlen != ltlen)) {
2130 0 : xfs_btree_mark_sick(bno_cur);
2131 0 : error = -EFSCORRUPTED;
2132 0 : goto error0;
2133 : }
2134 : }
2135 : #endif
2136 : /*
2137 : * Update remaining by-block entry to the new, joined block.
2138 : */
2139 14172427 : nbno = ltbno;
2140 14172427 : nlen = len + ltlen + gtlen;
2141 14172427 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2142 0 : goto error0;
2143 : }
2144 : /*
2145 : * Have only a left contiguous neighbor.
2146 : * Merge it together with the new freespace.
2147 : */
2148 84647702 : else if (haveleft) {
2149 : /*
2150 : * Delete the old by-size entry on the left.
2151 : */
2152 11808113 : if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
2153 6 : goto error0;
2154 11808669 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2155 0 : xfs_btree_mark_sick(cnt_cur);
2156 0 : error = -EFSCORRUPTED;
2157 0 : goto error0;
2158 : }
2159 11808669 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2160 0 : goto error0;
2161 11807568 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2162 0 : xfs_btree_mark_sick(cnt_cur);
2163 0 : error = -EFSCORRUPTED;
2164 0 : goto error0;
2165 : }
2166 : /*
2167 : * Back up the by-block cursor to the left neighbor, and
2168 : * update its length.
2169 : */
2170 11807568 : if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
2171 0 : goto error0;
2172 11807260 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2173 0 : xfs_btree_mark_sick(bno_cur);
2174 0 : error = -EFSCORRUPTED;
2175 0 : goto error0;
2176 : }
2177 11807260 : nbno = ltbno;
2178 11807260 : nlen = len + ltlen;
2179 11807260 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2180 0 : goto error0;
2181 : }
2182 : /*
2183 : * Have only a right contiguous neighbor.
2184 : * Merge it together with the new freespace.
2185 : */
2186 72839589 : else if (haveright) {
2187 : /*
2188 : * Delete the old by-size entry on the right.
2189 : */
2190 17378468 : if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
2191 18 : goto error0;
2192 17378663 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2193 0 : xfs_btree_mark_sick(cnt_cur);
2194 0 : error = -EFSCORRUPTED;
2195 0 : goto error0;
2196 : }
2197 17378663 : if ((error = xfs_btree_delete(cnt_cur, &i)))
2198 0 : goto error0;
2199 17377702 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2200 0 : xfs_btree_mark_sick(cnt_cur);
2201 0 : error = -EFSCORRUPTED;
2202 0 : goto error0;
2203 : }
2204 : /*
2205 : * Update the starting block and length of the right
2206 : * neighbor in the by-block tree.
2207 : */
2208 17377702 : nbno = bno;
2209 17377702 : nlen = len + gtlen;
2210 17377702 : if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
2211 0 : goto error0;
2212 : }
2213 : /*
2214 : * No contiguous neighbors.
2215 : * Insert the new freespace into the by-block tree.
2216 : */
2217 : else {
2218 55461121 : nbno = bno;
2219 55461121 : nlen = len;
2220 55461121 : if ((error = xfs_btree_insert(bno_cur, &i)))
2221 1 : goto error0;
2222 55457348 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2223 0 : xfs_btree_mark_sick(bno_cur);
2224 0 : error = -EFSCORRUPTED;
2225 0 : goto error0;
2226 : }
2227 : }
2228 98815960 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
2229 98821789 : bno_cur = NULL;
2230 : /*
2231 : * In all cases we need to insert the new freespace in the by-size tree.
2232 : */
2233 98821789 : if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
2234 40 : goto error0;
2235 98822476 : if (XFS_IS_CORRUPT(mp, i != 0)) {
2236 0 : xfs_btree_mark_sick(cnt_cur);
2237 0 : error = -EFSCORRUPTED;
2238 0 : goto error0;
2239 : }
2240 98822476 : if ((error = xfs_btree_insert(cnt_cur, &i)))
2241 1 : goto error0;
2242 98815852 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2243 0 : xfs_btree_mark_sick(cnt_cur);
2244 0 : error = -EFSCORRUPTED;
2245 0 : goto error0;
2246 : }
2247 98815852 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
2248 98822628 : cnt_cur = NULL;
2249 :
2250 : /*
2251 : * Update the freespace totals in the ag and superblock.
2252 : */
2253 98822628 : error = xfs_alloc_update_counters(tp, agbp, len);
2254 98814214 : xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
2255 98789716 : if (error)
2256 0 : goto error0;
2257 :
2258 98789716 : XFS_STATS_INC(mp, xs_freex);
2259 98804488 : XFS_STATS_ADD(mp, xs_freeb, len);
2260 :
2261 98804130 : trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
2262 :
2263 98804130 : return 0;
2264 :
2265 285 : error0:
2266 285 : trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
2267 285 : if (bno_cur)
2268 218 : xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
2269 285 : if (cnt_cur)
2270 102 : xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
2271 : return error;
2272 : }
2273 :
2274 : /*
2275 : * Visible (exported) allocation/free functions.
2276 : * Some of these are used just by xfs_alloc_btree.c and this file.
2277 : */
2278 :
2279 : /*
2280 : * Compute and fill in value of m_alloc_maxlevels.
2281 : */
2282 : void
2283 60880 : xfs_alloc_compute_maxlevels(
2284 : xfs_mount_t *mp) /* file system mount structure */
2285 : {
2286 121760 : mp->m_alloc_maxlevels = xfs_btree_compute_maxlevels(mp->m_alloc_mnr,
2287 60880 : (mp->m_sb.sb_agblocks + 1) / 2);
2288 60880 : ASSERT(mp->m_alloc_maxlevels <= xfs_allocbt_maxlevels_ondisk());
2289 60880 : }
2290 :
2291 : /*
2292 : * Find the length of the longest extent in an AG. The 'need' parameter
2293 : * specifies how much space we're going to need for the AGFL and the
2294 : * 'reserved' parameter tells us how many blocks in this AG are reserved for
2295 : * other callers.
2296 : */
2297 : xfs_extlen_t
2298 443150571 : xfs_alloc_longest_free_extent(
2299 : struct xfs_perag *pag,
2300 : xfs_extlen_t need,
2301 : xfs_extlen_t reserved)
2302 : {
2303 443150571 : xfs_extlen_t delta = 0;
2304 :
2305 : /*
2306 : * If the AGFL needs a recharge, we'll have to subtract that from the
2307 : * longest extent.
2308 : */
2309 443150571 : if (need > pag->pagf_flcount)
2310 563062 : delta = need - pag->pagf_flcount;
2311 :
2312 : /*
2313 : * If we cannot maintain others' reservations with space from the
2314 : * not-longest freesp extents, we'll have to subtract /that/ from
2315 : * the longest extent too.
2316 : */
2317 443150571 : if (pag->pagf_freeblks - pag->pagf_longest < reserved)
2318 345035444 : delta += reserved - (pag->pagf_freeblks - pag->pagf_longest);
2319 :
2320 : /*
2321 : * If the longest extent is long enough to satisfy all the
2322 : * reservations and AGFL rules in place, we can return this extent.
2323 : */
2324 443150571 : if (pag->pagf_longest > delta)
2325 442818294 : return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
2326 : pag->pagf_longest - delta);
2327 :
2328 : /* Otherwise, let the caller try for 1 block if there's space. */
2329 332277 : return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
2330 : }
2331 :
2332 : /*
2333 : * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
2334 : * return the largest possible minimum length.
2335 : */
2336 : unsigned int
2337 1453426713 : xfs_alloc_min_freelist(
2338 : struct xfs_mount *mp,
2339 : struct xfs_perag *pag)
2340 : {
2341 : /* AG btrees have at least 1 level. */
2342 1453426713 : static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
2343 1453426713 : const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
2344 1453426713 : unsigned int min_free;
2345 :
2346 1453426713 : ASSERT(mp->m_alloc_maxlevels > 0);
2347 :
2348 : /* space needed by-bno freespace btree */
2349 1453426713 : min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
2350 : mp->m_alloc_maxlevels);
2351 : /* space needed by-size freespace btree */
2352 1453426713 : min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
2353 : mp->m_alloc_maxlevels);
2354 : /* space needed reverse mapping used space btree */
2355 1453426713 : if (xfs_has_rmapbt(mp))
2356 1379650659 : min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
2357 : mp->m_rmap_maxlevels);
2358 :
2359 1453426713 : return min_free;
2360 : }
2361 :
2362 : /*
2363 : * Check if the operation we are fixing up the freelist for should go ahead or
2364 : * not. If we are freeing blocks, we always allow it, otherwise the allocation
2365 : * is dependent on whether the size and shape of free space available will
2366 : * permit the requested allocation to take place.
2367 : */
2368 : static bool
2369 1372457279 : xfs_alloc_space_available(
2370 : struct xfs_alloc_arg *args,
2371 : xfs_extlen_t min_free,
2372 : int flags)
2373 : {
2374 1372457279 : struct xfs_perag *pag = args->pag;
2375 1372457279 : xfs_extlen_t alloc_len, longest;
2376 1372457279 : xfs_extlen_t reservation; /* blocks that are still reserved */
2377 1372457279 : int available;
2378 1372457279 : xfs_extlen_t agflcount;
2379 :
2380 1372457279 : if (flags & XFS_ALLOC_FLAG_FREEING)
2381 : return true;
2382 :
2383 362197117 : reservation = xfs_ag_resv_needed(pag, args->resv);
2384 :
2385 : /* do we have enough contiguous free space for the allocation? */
2386 362209034 : alloc_len = args->minlen + (args->alignment - 1) + args->minalignslop;
2387 362209034 : longest = xfs_alloc_longest_free_extent(pag, min_free, reservation);
2388 362209034 : if (longest < alloc_len)
2389 : return false;
2390 :
2391 : /*
2392 : * Do we have enough free space remaining for the allocation? Don't
2393 : * account extra agfl blocks because we are about to defer free them,
2394 : * making them unavailable until the current transaction commits.
2395 : */
2396 286720886 : agflcount = min_t(xfs_extlen_t, pag->pagf_flcount, min_free);
2397 286720886 : available = (int)(pag->pagf_freeblks + agflcount -
2398 286720886 : reservation - min_free - args->minleft);
2399 286720886 : if (available < (int)max(args->total, alloc_len))
2400 : return false;
2401 :
2402 : /*
2403 : * Clamp maxlen to the amount of free space available for the actual
2404 : * extent allocation.
2405 : */
2406 178131138 : if (available < (int)args->maxlen && !(flags & XFS_ALLOC_FLAG_CHECK)) {
2407 414523 : args->maxlen = available;
2408 414523 : ASSERT(args->maxlen > 0);
2409 414523 : ASSERT(args->maxlen >= args->minlen);
2410 : }
2411 :
2412 : return true;
2413 : }
2414 :
2415 : int
2416 266507 : xfs_free_agfl_block(
2417 : struct xfs_trans *tp,
2418 : xfs_agnumber_t agno,
2419 : xfs_agblock_t agbno,
2420 : struct xfs_buf *agbp,
2421 : struct xfs_owner_info *oinfo)
2422 : {
2423 266507 : int error;
2424 266507 : struct xfs_buf *bp;
2425 :
2426 266507 : error = xfs_free_ag_extent(tp, agbp, agno, agbno, 1, oinfo,
2427 : XFS_AG_RESV_AGFL);
2428 266507 : if (error)
2429 : return error;
2430 :
2431 533000 : error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
2432 266500 : XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
2433 266500 : tp->t_mountp->m_bsize, 0, &bp);
2434 266499 : if (error)
2435 : return error;
2436 266499 : xfs_trans_binval(tp, bp);
2437 :
2438 266499 : return 0;
2439 : }
2440 :
2441 : /*
2442 : * Check the agfl fields of the agf for inconsistency or corruption.
2443 : *
2444 : * The original purpose was to detect an agfl header padding mismatch between
2445 : * current and early v5 kernels. This problem manifests as a 1-slot size
2446 : * difference between the on-disk flcount and the active [first, last] range of
2447 : * a wrapped agfl.
2448 : *
2449 : * However, we need to use these same checks to catch agfl count corruptions
2450 : * unrelated to padding. This could occur on any v4 or v5 filesystem, so either
2451 : * way, we need to reset the agfl and warn the user.
2452 : *
2453 : * Return true if a reset is required before the agfl can be used, false
2454 : * otherwise.
2455 : */
2456 : static bool
2457 523791 : xfs_agfl_needs_reset(
2458 : struct xfs_mount *mp,
2459 : struct xfs_agf *agf)
2460 : {
2461 523791 : uint32_t f = be32_to_cpu(agf->agf_flfirst);
2462 523791 : uint32_t l = be32_to_cpu(agf->agf_fllast);
2463 523791 : uint32_t c = be32_to_cpu(agf->agf_flcount);
2464 523791 : int agfl_size = xfs_agfl_size(mp);
2465 523791 : int active;
2466 :
2467 : /*
2468 : * The agf read verifier catches severe corruption of these fields.
2469 : * Repeat some sanity checks to cover a packed -> unpacked mismatch if
2470 : * the verifier allows it.
2471 : */
2472 523791 : if (f >= agfl_size || l >= agfl_size)
2473 : return true;
2474 523791 : if (c > agfl_size)
2475 : return true;
2476 :
2477 : /*
2478 : * Check consistency between the on-disk count and the active range. An
2479 : * agfl padding mismatch manifests as an inconsistent flcount.
2480 : */
2481 523791 : if (c && l >= f)
2482 490770 : active = l - f + 1;
2483 33021 : else if (c)
2484 231 : active = agfl_size - f + l + 1;
2485 : else
2486 : active = 0;
2487 :
2488 523791 : return active != c;
2489 : }
2490 :
2491 : /*
2492 : * Reset the agfl to an empty state. Ignore/drop any existing blocks since the
2493 : * agfl content cannot be trusted. Warn the user that a repair is required to
2494 : * recover leaked blocks.
2495 : *
2496 : * The purpose of this mechanism is to handle filesystems affected by the agfl
2497 : * header padding mismatch problem. A reset keeps the filesystem online with a
2498 : * relatively minor free space accounting inconsistency rather than suffer the
2499 : * inevitable crash from use of an invalid agfl block.
2500 : */
2501 : static void
2502 20 : xfs_agfl_reset(
2503 : struct xfs_trans *tp,
2504 : struct xfs_buf *agbp,
2505 : struct xfs_perag *pag)
2506 : {
2507 20 : struct xfs_mount *mp = tp->t_mountp;
2508 20 : struct xfs_agf *agf = agbp->b_addr;
2509 :
2510 40 : ASSERT(xfs_perag_agfl_needs_reset(pag));
2511 20 : trace_xfs_agfl_reset(mp, agf, 0, _RET_IP_);
2512 :
2513 20 : xfs_warn(mp,
2514 : "WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
2515 : "Please unmount and run xfs_repair.",
2516 : pag->pag_agno, pag->pagf_flcount);
2517 :
2518 20 : agf->agf_flfirst = 0;
2519 20 : agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
2520 20 : agf->agf_flcount = 0;
2521 20 : xfs_alloc_log_agf(tp, agbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST |
2522 : XFS_AGF_FLCOUNT);
2523 :
2524 20 : pag->pagf_flcount = 0;
2525 20 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
2526 20 : }
2527 :
2528 : /*
2529 : * Defer an AGFL block free. This is effectively equivalent to
2530 : * xfs_free_extent_later() with some special handling particular to AGFL blocks.
2531 : *
2532 : * Deferring AGFL frees helps prevent log reservation overruns due to too many
2533 : * allocation operations in a transaction. AGFL frees are prone to this problem
2534 : * because for one they are always freed one at a time. Further, an immediate
2535 : * AGFL block free can cause a btree join and require another block free before
2536 : * the real allocation can proceed. Deferring the free disconnects freeing up
2537 : * the AGFL slot from freeing the block.
2538 : */
2539 : static int
2540 266519 : xfs_defer_agfl_block(
2541 : struct xfs_trans *tp,
2542 : xfs_agnumber_t agno,
2543 : xfs_agblock_t agbno,
2544 : struct xfs_owner_info *oinfo)
2545 : {
2546 266519 : struct xfs_mount *mp = tp->t_mountp;
2547 266519 : struct xfs_extent_free_item *xefi;
2548 266519 : xfs_fsblock_t fsbno = XFS_AGB_TO_FSB(mp, agno, agbno);
2549 :
2550 266519 : ASSERT(xfs_extfree_item_cache != NULL);
2551 266519 : ASSERT(oinfo != NULL);
2552 :
2553 266519 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, fsbno)))
2554 0 : return -EFSCORRUPTED;
2555 :
2556 266519 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2557 : GFP_KERNEL | __GFP_NOFAIL);
2558 266520 : xefi->xefi_startblock = fsbno;
2559 266520 : xefi->xefi_blockcount = 1;
2560 266520 : xefi->xefi_owner = oinfo->oi_owner;
2561 266520 : xefi->xefi_agresv = XFS_AG_RESV_AGFL;
2562 :
2563 266520 : trace_xfs_agfl_free_defer(mp, agno, 0, agbno, 1);
2564 :
2565 266519 : xfs_extent_free_get_group(mp, xefi);
2566 266520 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_AGFL_FREE, &xefi->xefi_list);
2567 266520 : return 0;
2568 : }
2569 :
2570 : /*
2571 : * Add the extent to the list of extents to be free at transaction end.
2572 : * The list is maintained sorted (by block number).
2573 : */
2574 : int
2575 98479560 : __xfs_free_extent_later(
2576 : struct xfs_trans *tp,
2577 : xfs_fsblock_t bno,
2578 : xfs_filblks_t len,
2579 : const struct xfs_owner_info *oinfo,
2580 : enum xfs_ag_resv_type type,
2581 : bool skip_discard)
2582 : {
2583 98479560 : struct xfs_extent_free_item *xefi;
2584 98479560 : struct xfs_mount *mp = tp->t_mountp;
2585 : #ifdef DEBUG
2586 98479560 : xfs_agnumber_t agno;
2587 98479560 : xfs_agblock_t agbno;
2588 :
2589 98479560 : ASSERT(bno != NULLFSBLOCK);
2590 98479560 : ASSERT(len > 0);
2591 98479560 : ASSERT(len <= XFS_MAX_BMBT_EXTLEN);
2592 98479560 : ASSERT(!isnullstartblock(bno));
2593 98479560 : agno = XFS_FSB_TO_AGNO(mp, bno);
2594 98479560 : agbno = XFS_FSB_TO_AGBNO(mp, bno);
2595 98419047 : ASSERT(agno < mp->m_sb.sb_agcount);
2596 98419047 : ASSERT(agbno < mp->m_sb.sb_agblocks);
2597 98419047 : ASSERT(len < mp->m_sb.sb_agblocks);
2598 98419047 : ASSERT(agbno + len <= mp->m_sb.sb_agblocks);
2599 : #endif
2600 98419047 : ASSERT(xfs_extfree_item_cache != NULL);
2601 98419047 : ASSERT(type != XFS_AG_RESV_AGFL);
2602 :
2603 98419047 : if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
2604 0 : return -EFSCORRUPTED;
2605 :
2606 98472025 : xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
2607 : GFP_KERNEL | __GFP_NOFAIL);
2608 98495964 : xefi->xefi_startblock = bno;
2609 98495964 : xefi->xefi_blockcount = (xfs_extlen_t)len;
2610 98495964 : xefi->xefi_agresv = type;
2611 98495964 : if (skip_discard)
2612 6993594 : xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2613 98495964 : if (oinfo) {
2614 2790723 : ASSERT(oinfo->oi_offset == 0);
2615 :
2616 2790723 : if (oinfo->oi_flags & XFS_OWNER_INFO_ATTR_FORK)
2617 143 : xefi->xefi_flags |= XFS_EFI_ATTR_FORK;
2618 2790723 : if (oinfo->oi_flags & XFS_OWNER_INFO_BMBT_BLOCK)
2619 2014601 : xefi->xefi_flags |= XFS_EFI_BMBT_BLOCK;
2620 2790723 : xefi->xefi_owner = oinfo->oi_owner;
2621 : } else {
2622 95705241 : xefi->xefi_owner = XFS_RMAP_OWN_NULL;
2623 : }
2624 295418676 : trace_xfs_bmap_free_defer(mp,
2625 98461356 : XFS_FSB_TO_AGNO(tp->t_mountp, bno), 0,
2626 98495964 : XFS_FSB_TO_AGBNO(tp->t_mountp, bno), len);
2627 :
2628 98440075 : xfs_extent_free_get_group(mp, xefi);
2629 98537276 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_FREE, &xefi->xefi_list);
2630 98537276 : return 0;
2631 : }
2632 :
2633 : #ifdef DEBUG
2634 : /*
2635 : * Check if an AGF has a free extent record whose length is equal to
2636 : * args->minlen.
2637 : */
2638 : STATIC int
2639 2775156 : xfs_exact_minlen_extent_available(
2640 : struct xfs_alloc_arg *args,
2641 : struct xfs_buf *agbp,
2642 : int *stat)
2643 : {
2644 2775156 : struct xfs_btree_cur *cnt_cur;
2645 2775156 : xfs_agblock_t fbno;
2646 2775156 : xfs_extlen_t flen;
2647 2775156 : int error = 0;
2648 :
2649 2775156 : cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, agbp,
2650 : args->pag, XFS_BTNUM_CNT);
2651 2775162 : error = xfs_alloc_lookup_ge(cnt_cur, 0, args->minlen, stat);
2652 2774448 : if (error)
2653 0 : goto out;
2654 :
2655 2774448 : if (*stat == 0) {
2656 0 : xfs_btree_mark_sick(cnt_cur);
2657 0 : error = -EFSCORRUPTED;
2658 0 : goto out;
2659 : }
2660 :
2661 2774448 : error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, stat);
2662 2771784 : if (error)
2663 0 : goto out;
2664 :
2665 2771784 : if (*stat == 1 && flen != args->minlen)
2666 39326 : *stat = 0;
2667 :
2668 2732458 : out:
2669 2771784 : xfs_btree_del_cursor(cnt_cur, error);
2670 :
2671 2774301 : return error;
2672 : }
2673 : #endif
2674 :
2675 : /*
2676 : * Decide whether to use this allocation group for this allocation.
2677 : * If so, fix up the btree freelist's size.
2678 : */
2679 : int /* error */
2680 781417678 : xfs_alloc_fix_freelist(
2681 : struct xfs_alloc_arg *args, /* allocation argument structure */
2682 : uint32_t alloc_flags)
2683 : {
2684 781417678 : struct xfs_mount *mp = args->mp;
2685 781417678 : struct xfs_perag *pag = args->pag;
2686 781417678 : struct xfs_trans *tp = args->tp;
2687 781417678 : struct xfs_buf *agbp = NULL;
2688 781417678 : struct xfs_buf *agflbp = NULL;
2689 781417678 : struct xfs_alloc_arg targs; /* local allocation arguments */
2690 781417678 : xfs_agblock_t bno; /* freelist block */
2691 781417678 : xfs_extlen_t need; /* total blocks needed in freelist */
2692 781417678 : int error = 0;
2693 :
2694 : /* deferred ops (AGFL block frees) require permanent transactions */
2695 781417678 : ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
2696 :
2697 1562835356 : if (!xfs_perag_initialised_agf(pag)) {
2698 5730 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2699 5730 : if (error) {
2700 : /* Couldn't lock the AGF so skip this AG. */
2701 2422 : if (error == -EAGAIN)
2702 2422 : error = 0;
2703 2422 : goto out_no_agbp;
2704 : }
2705 : }
2706 :
2707 : /*
2708 : * If this is a metadata preferred pag and we are user data then try
2709 : * somewhere else if we are not being asked to try harder at this
2710 : * point
2711 : */
2712 1562830512 : if (xfs_perag_prefers_metadata(pag) &&
2713 0 : (args->datatype & XFS_ALLOC_USERDATA) &&
2714 0 : (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)) {
2715 0 : ASSERT(!(alloc_flags & XFS_ALLOC_FLAG_FREEING));
2716 0 : goto out_agbp_relse;
2717 : }
2718 :
2719 781415256 : need = xfs_alloc_min_freelist(mp, pag);
2720 781371356 : if (!xfs_alloc_space_available(args, need, alloc_flags |
2721 : XFS_ALLOC_FLAG_CHECK))
2722 184071848 : goto out_agbp_relse;
2723 :
2724 : /*
2725 : * Get the a.g. freespace buffer.
2726 : * Can fail if we're not blocking on locks, and it's held.
2727 : */
2728 597253268 : if (!agbp) {
2729 597255228 : error = xfs_alloc_read_agf(pag, tp, alloc_flags, &agbp);
2730 597364428 : if (error) {
2731 : /* Couldn't lock the AGF so skip this AG. */
2732 6182918 : if (error == -EAGAIN)
2733 6181475 : error = 0;
2734 6182918 : goto out_no_agbp;
2735 : }
2736 : }
2737 :
2738 : /* reset a padding mismatched agfl before final free space check */
2739 1182359100 : if (xfs_perag_agfl_needs_reset(pag))
2740 20 : xfs_agfl_reset(tp, agbp, pag);
2741 :
2742 : /* If there isn't enough total space or single-extent, reject it. */
2743 591179550 : need = xfs_alloc_min_freelist(mp, pag);
2744 591138629 : if (!xfs_alloc_space_available(args, need, alloc_flags))
2745 5971 : goto out_agbp_relse;
2746 :
2747 : #ifdef DEBUG
2748 591152851 : if (args->alloc_minlen_only) {
2749 2775315 : int stat;
2750 :
2751 2775315 : error = xfs_exact_minlen_extent_available(args, agbp, &stat);
2752 2774511 : if (error || !stat)
2753 39326 : goto out_agbp_relse;
2754 : }
2755 : #endif
2756 : /*
2757 : * Make the freelist shorter if it's too long.
2758 : *
2759 : * Note that from this point onwards, we will always release the agf and
2760 : * agfl buffers on error. This handles the case where we error out and
2761 : * the buffers are clean or may not have been joined to the transaction
2762 : * and hence need to be released manually. If they have been joined to
2763 : * the transaction, then xfs_trans_brelse() will handle them
2764 : * appropriately based on the recursion count and dirty state of the
2765 : * buffer.
2766 : *
2767 : * XXX (dgc): When we have lots of free space, does this buy us
2768 : * anything other than extra overhead when we need to put more blocks
2769 : * back on the free list? Maybe we should only do this when space is
2770 : * getting low or the AGFL is more than half full?
2771 : *
2772 : * The NOSHRINK flag prevents the AGFL from being shrunk if it's too
2773 : * big; the NORMAP flag prevents AGFL expand/shrink operations from
2774 : * updating the rmapbt. Both flags are used in xfs_repair while we're
2775 : * rebuilding the rmapbt, and neither are used by the kernel. They're
2776 : * both required to ensure that rmaps are correctly recorded for the
2777 : * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and
2778 : * repair/rmap.c in xfsprogs for details.
2779 : */
2780 591112721 : memset(&targs, 0, sizeof(targs));
2781 : /* struct copy below */
2782 591112721 : if (alloc_flags & XFS_ALLOC_FLAG_NORMAP)
2783 19005 : targs.oinfo = XFS_RMAP_OINFO_SKIP_UPDATE;
2784 : else
2785 591093716 : targs.oinfo = XFS_RMAP_OINFO_AG;
2786 591379241 : while (!(alloc_flags & XFS_ALLOC_FLAG_NOSHRINK) &&
2787 591379241 : pag->pagf_flcount > need) {
2788 269989 : error = xfs_alloc_get_freelist(pag, tp, agbp, &bno, 0);
2789 266519 : if (error)
2790 0 : goto out_agbp_relse;
2791 :
2792 : /* defer agfl frees */
2793 266519 : error = xfs_defer_agfl_block(tp, args->agno, bno, &targs.oinfo);
2794 266520 : if (error)
2795 0 : goto out_agbp_relse;
2796 : }
2797 :
2798 591110875 : targs.tp = tp;
2799 591110875 : targs.mp = mp;
2800 591110875 : targs.agbp = agbp;
2801 591110875 : targs.agno = args->agno;
2802 591110875 : targs.alignment = targs.minlen = targs.prod = 1;
2803 591110875 : targs.pag = pag;
2804 591110875 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2805 591178203 : if (error)
2806 1774 : goto out_agbp_relse;
2807 :
2808 : /* Make the freelist longer if it's too short. */
2809 591958879 : while (pag->pagf_flcount < need) {
2810 780060 : targs.agbno = 0;
2811 780060 : targs.maxlen = need - pag->pagf_flcount;
2812 780060 : targs.resv = XFS_AG_RESV_AGFL;
2813 :
2814 : /* Allocate as many blocks as possible at once. */
2815 780060 : error = xfs_alloc_ag_vextent_size(&targs, alloc_flags);
2816 782462 : if (error)
2817 0 : goto out_agflbp_relse;
2818 :
2819 : /*
2820 : * Stop if we run out. Won't happen if callers are obeying
2821 : * the restrictions correctly. Can happen for free calls
2822 : * on a completely full ag.
2823 : */
2824 782462 : if (targs.agbno == NULLAGBLOCK) {
2825 0 : if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
2826 : break;
2827 0 : goto out_agflbp_relse;
2828 : }
2829 :
2830 782462 : if (!xfs_rmap_should_skip_owner_update(&targs.oinfo)) {
2831 782454 : error = xfs_rmap_alloc(tp, agbp, pag,
2832 : targs.agbno, targs.len, &targs.oinfo);
2833 782455 : if (error)
2834 13 : goto out_agflbp_relse;
2835 : }
2836 782450 : error = xfs_alloc_update_counters(tp, agbp,
2837 782450 : -((long)(targs.len)));
2838 782450 : if (error)
2839 0 : goto out_agflbp_relse;
2840 :
2841 : /*
2842 : * Put each allocated block on the list.
2843 : */
2844 1670992 : for (bno = targs.agbno; bno < targs.agbno + targs.len; bno++) {
2845 888542 : error = xfs_alloc_put_freelist(pag, tp, agbp,
2846 : agflbp, bno, 0);
2847 888542 : if (error)
2848 0 : goto out_agflbp_relse;
2849 : }
2850 : }
2851 591178837 : xfs_trans_brelse(tp, agflbp);
2852 591188299 : args->agbp = agbp;
2853 591188299 : return 0;
2854 :
2855 13 : out_agflbp_relse:
2856 13 : xfs_trans_brelse(tp, agflbp);
2857 184118932 : out_agbp_relse:
2858 184118932 : if (agbp)
2859 47052 : xfs_trans_brelse(tp, agbp);
2860 184071880 : out_no_agbp:
2861 190304304 : args->agbp = NULL;
2862 190304304 : return error;
2863 : }
2864 :
2865 : /*
2866 : * Get a block from the freelist.
2867 : * Returns with the buffer for the block gotten.
2868 : */
2869 : int
2870 1125368 : xfs_alloc_get_freelist(
2871 : struct xfs_perag *pag,
2872 : struct xfs_trans *tp,
2873 : struct xfs_buf *agbp,
2874 : xfs_agblock_t *bnop,
2875 : int btreeblk)
2876 : {
2877 1125368 : struct xfs_agf *agf = agbp->b_addr;
2878 1125368 : struct xfs_buf *agflbp;
2879 1125368 : xfs_agblock_t bno;
2880 1125368 : __be32 *agfl_bno;
2881 1125368 : int error;
2882 1125368 : uint32_t logflags;
2883 1125368 : struct xfs_mount *mp = tp->t_mountp;
2884 :
2885 : /*
2886 : * Freelist is empty, give up.
2887 : */
2888 1125368 : if (!agf->agf_flcount) {
2889 0 : *bnop = NULLAGBLOCK;
2890 0 : return 0;
2891 : }
2892 : /*
2893 : * Read the array of free blocks.
2894 : */
2895 1125368 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2896 1125367 : if (error)
2897 : return error;
2898 :
2899 :
2900 : /*
2901 : * Get the block number and update the data structures.
2902 : */
2903 1125367 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
2904 1125367 : bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]);
2905 2250735 : if (XFS_IS_CORRUPT(tp->t_mountp, !xfs_verify_agbno(pag, bno)))
2906 0 : return -EFSCORRUPTED;
2907 :
2908 1125367 : be32_add_cpu(&agf->agf_flfirst, 1);
2909 1125367 : xfs_trans_brelse(tp, agflbp);
2910 2250732 : if (be32_to_cpu(agf->agf_flfirst) == xfs_agfl_size(mp))
2911 738 : agf->agf_flfirst = 0;
2912 :
2913 2250732 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
2914 1125366 : be32_add_cpu(&agf->agf_flcount, -1);
2915 1125366 : pag->pagf_flcount--;
2916 :
2917 1125366 : logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
2918 1125366 : if (btreeblk) {
2919 858847 : be32_add_cpu(&agf->agf_btreeblks, 1);
2920 858847 : pag->pagf_btreeblks++;
2921 858847 : logflags |= XFS_AGF_BTREEBLKS;
2922 : }
2923 :
2924 1125366 : xfs_alloc_log_agf(tp, agbp, logflags);
2925 1125367 : *bnop = bno;
2926 :
2927 1125367 : return 0;
2928 : }
2929 :
2930 : /*
2931 : * Log the given fields from the agf structure.
2932 : */
2933 : void
2934 239840015 : xfs_alloc_log_agf(
2935 : struct xfs_trans *tp,
2936 : struct xfs_buf *bp,
2937 : uint32_t fields)
2938 : {
2939 239840015 : int first; /* first byte offset */
2940 239840015 : int last; /* last byte offset */
2941 239840015 : static const short offsets[] = {
2942 : offsetof(xfs_agf_t, agf_magicnum),
2943 : offsetof(xfs_agf_t, agf_versionnum),
2944 : offsetof(xfs_agf_t, agf_seqno),
2945 : offsetof(xfs_agf_t, agf_length),
2946 : offsetof(xfs_agf_t, agf_roots[0]),
2947 : offsetof(xfs_agf_t, agf_levels[0]),
2948 : offsetof(xfs_agf_t, agf_flfirst),
2949 : offsetof(xfs_agf_t, agf_fllast),
2950 : offsetof(xfs_agf_t, agf_flcount),
2951 : offsetof(xfs_agf_t, agf_freeblks),
2952 : offsetof(xfs_agf_t, agf_longest),
2953 : offsetof(xfs_agf_t, agf_btreeblks),
2954 : offsetof(xfs_agf_t, agf_uuid),
2955 : offsetof(xfs_agf_t, agf_rmap_blocks),
2956 : offsetof(xfs_agf_t, agf_refcount_blocks),
2957 : offsetof(xfs_agf_t, agf_refcount_root),
2958 : offsetof(xfs_agf_t, agf_refcount_level),
2959 : /* needed so that we don't log the whole rest of the structure: */
2960 : offsetof(xfs_agf_t, agf_spare64),
2961 : sizeof(xfs_agf_t)
2962 : };
2963 :
2964 239840015 : trace_xfs_agf(tp->t_mountp, bp->b_addr, fields, _RET_IP_);
2965 :
2966 239824259 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF);
2967 :
2968 239838920 : xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last);
2969 239848671 : xfs_trans_log_buf(tp, bp, (uint)first, (uint)last);
2970 239877544 : }
2971 :
2972 : /*
2973 : * Put the block on the freelist for the allocation group.
2974 : */
2975 : int
2976 1133519 : xfs_alloc_put_freelist(
2977 : struct xfs_perag *pag,
2978 : struct xfs_trans *tp,
2979 : struct xfs_buf *agbp,
2980 : struct xfs_buf *agflbp,
2981 : xfs_agblock_t bno,
2982 : int btreeblk)
2983 : {
2984 1133519 : struct xfs_mount *mp = tp->t_mountp;
2985 1133519 : struct xfs_agf *agf = agbp->b_addr;
2986 1133519 : __be32 *blockp;
2987 1133519 : int error;
2988 1133519 : uint32_t logflags;
2989 1133519 : __be32 *agfl_bno;
2990 1133519 : int startoff;
2991 :
2992 1133519 : if (!agflbp) {
2993 244979 : error = xfs_alloc_read_agfl(pag, tp, &agflbp);
2994 244979 : if (error)
2995 : return error;
2996 : }
2997 :
2998 1133519 : be32_add_cpu(&agf->agf_fllast, 1);
2999 2267038 : if (be32_to_cpu(agf->agf_fllast) == xfs_agfl_size(mp))
3000 748 : agf->agf_fllast = 0;
3001 :
3002 2267038 : ASSERT(!xfs_perag_agfl_needs_reset(pag));
3003 1133519 : be32_add_cpu(&agf->agf_flcount, 1);
3004 1133519 : pag->pagf_flcount++;
3005 :
3006 1133519 : logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
3007 1133519 : if (btreeblk) {
3008 244979 : be32_add_cpu(&agf->agf_btreeblks, -1);
3009 244979 : pag->pagf_btreeblks--;
3010 244979 : logflags |= XFS_AGF_BTREEBLKS;
3011 : }
3012 :
3013 1133519 : xfs_alloc_log_agf(tp, agbp, logflags);
3014 :
3015 2267041 : ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
3016 :
3017 1133520 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3018 1133520 : blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)];
3019 1133520 : *blockp = cpu_to_be32(bno);
3020 1133520 : startoff = (char *)blockp - (char *)agflbp->b_addr;
3021 :
3022 1133520 : xfs_alloc_log_agf(tp, agbp, logflags);
3023 :
3024 1133521 : xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF);
3025 1133520 : xfs_trans_log_buf(tp, agflbp, startoff,
3026 : startoff + sizeof(xfs_agblock_t) - 1);
3027 1133520 : return 0;
3028 : }
3029 :
3030 : /*
3031 : * Check that this AGF/AGI header's sequence number and length matches the AG
3032 : * number and size in fsblocks.
3033 : */
3034 : xfs_failaddr_t
3035 4849320 : xfs_validate_ag_length(
3036 : struct xfs_buf *bp,
3037 : uint32_t seqno,
3038 : uint32_t length)
3039 : {
3040 4849320 : struct xfs_mount *mp = bp->b_mount;
3041 : /*
3042 : * During growfs operations, the perag is not fully initialised,
3043 : * so we can't use it for any useful checking. growfs ensures we can't
3044 : * use it by using uncached buffers that don't have the perag attached
3045 : * so we can detect and avoid this problem.
3046 : */
3047 4849320 : if (bp->b_pag && seqno != bp->b_pag->pag_agno)
3048 0 : return __this_address;
3049 :
3050 : /*
3051 : * Only the last AG in the filesystem is allowed to be shorter
3052 : * than the AG size recorded in the superblock.
3053 : */
3054 4849320 : if (length != mp->m_sb.sb_agblocks) {
3055 : /*
3056 : * During growfs, the new last AG can get here before we
3057 : * have updated the superblock. Give it a pass on the seqno
3058 : * check.
3059 : */
3060 26438 : if (bp->b_pag && seqno != mp->m_sb.sb_agcount - 1)
3061 0 : return __this_address;
3062 26438 : if (length < XFS_MIN_AG_BLOCKS)
3063 0 : return __this_address;
3064 26438 : if (length > mp->m_sb.sb_agblocks)
3065 11 : return __this_address;
3066 : }
3067 :
3068 : return NULL;
3069 : }
3070 :
3071 : /*
3072 : * Verify the AGF is consistent.
3073 : *
3074 : * We do not verify the AGFL indexes in the AGF are fully consistent here
3075 : * because of issues with variable on-disk structure sizes. Instead, we check
3076 : * the agfl indexes for consistency when we initialise the perag from the AGF
3077 : * information after a read completes.
3078 : *
3079 : * If the index is inconsistent, then we mark the perag as needing an AGFL
3080 : * reset. The first AGFL update performed then resets the AGFL indexes and
3081 : * refills the AGFL with known good free blocks, allowing the filesystem to
3082 : * continue operating normally at the cost of a few leaked free space blocks.
3083 : */
3084 : static xfs_failaddr_t
3085 2634000 : xfs_agf_verify(
3086 : struct xfs_buf *bp)
3087 : {
3088 2634000 : struct xfs_mount *mp = bp->b_mount;
3089 2634000 : struct xfs_agf *agf = bp->b_addr;
3090 2634000 : xfs_failaddr_t fa;
3091 2634000 : uint32_t agf_seqno = be32_to_cpu(agf->agf_seqno);
3092 2634000 : uint32_t agf_length = be32_to_cpu(agf->agf_length);
3093 :
3094 2634000 : if (xfs_has_crc(mp)) {
3095 2620008 : if (!uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid))
3096 0 : return __this_address;
3097 2620108 : if (!xfs_log_check_lsn(mp, be64_to_cpu(agf->agf_lsn)))
3098 0 : return __this_address;
3099 : }
3100 :
3101 2634088 : if (!xfs_verify_magic(bp, agf->agf_magicnum))
3102 0 : return __this_address;
3103 :
3104 2633701 : if (!XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)))
3105 0 : return __this_address;
3106 :
3107 : /*
3108 : * Both agf_seqno and agf_length need to validated before anything else
3109 : * block number related in the AGF or AGFL can be checked.
3110 : */
3111 2633701 : fa = xfs_validate_ag_length(bp, agf_seqno, agf_length);
3112 2633467 : if (fa)
3113 : return fa;
3114 :
3115 5253304 : if (be32_to_cpu(agf->agf_flfirst) >= xfs_agfl_size(mp))
3116 10 : return __this_address;
3117 2633561 : if (be32_to_cpu(agf->agf_fllast) >= xfs_agfl_size(mp))
3118 0 : return __this_address;
3119 2633561 : if (be32_to_cpu(agf->agf_flcount) > xfs_agfl_size(mp))
3120 0 : return __this_address;
3121 :
3122 2633561 : if (be32_to_cpu(agf->agf_freeblks) < be32_to_cpu(agf->agf_longest) ||
3123 : be32_to_cpu(agf->agf_freeblks) > agf_length)
3124 22 : return __this_address;
3125 :
3126 2633539 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) < 1 ||
3127 2633539 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) < 1 ||
3128 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]) >
3129 2633539 : mp->m_alloc_maxlevels ||
3130 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) >
3131 : mp->m_alloc_maxlevels)
3132 0 : return __this_address;
3133 :
3134 2633539 : if (xfs_has_lazysbcount(mp) &&
3135 2633598 : be32_to_cpu(agf->agf_btreeblks) > agf_length)
3136 0 : return __this_address;
3137 :
3138 2633539 : if (xfs_has_rmapbt(mp)) {
3139 2245219 : if (be32_to_cpu(agf->agf_rmap_blocks) > agf_length)
3140 11 : return __this_address;
3141 :
3142 2245208 : if (be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) < 1 ||
3143 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) >
3144 2245208 : mp->m_rmap_maxlevels)
3145 0 : return __this_address;
3146 : }
3147 :
3148 2633528 : if (xfs_has_reflink(mp)) {
3149 2244670 : if (be32_to_cpu(agf->agf_refcount_blocks) > agf_length)
3150 11 : return __this_address;
3151 :
3152 2244659 : if (be32_to_cpu(agf->agf_refcount_level) < 1 ||
3153 2244659 : be32_to_cpu(agf->agf_refcount_level) > mp->m_refc_maxlevels)
3154 0 : return __this_address;
3155 : }
3156 :
3157 : return NULL;
3158 : }
3159 :
3160 : static void
3161 820106 : xfs_agf_read_verify(
3162 : struct xfs_buf *bp)
3163 : {
3164 820106 : struct xfs_mount *mp = bp->b_mount;
3165 820106 : xfs_failaddr_t fa;
3166 :
3167 1640135 : if (xfs_has_crc(mp) &&
3168 : !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF))
3169 52 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
3170 : else {
3171 820054 : fa = xfs_agf_verify(bp);
3172 820054 : if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_ALLOC_READ_AGF))
3173 65 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3174 : }
3175 820106 : }
3176 :
3177 : static void
3178 1231580 : xfs_agf_write_verify(
3179 : struct xfs_buf *bp)
3180 : {
3181 1231580 : struct xfs_mount *mp = bp->b_mount;
3182 1231580 : struct xfs_buf_log_item *bip = bp->b_log_item;
3183 1231580 : struct xfs_agf *agf = bp->b_addr;
3184 1231580 : xfs_failaddr_t fa;
3185 :
3186 1231580 : fa = xfs_agf_verify(bp);
3187 1231580 : if (fa) {
3188 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
3189 0 : return;
3190 : }
3191 :
3192 1231580 : if (!xfs_has_crc(mp))
3193 : return;
3194 :
3195 1217939 : if (bip)
3196 1199931 : agf->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn);
3197 :
3198 1217939 : xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);
3199 : }
3200 :
3201 : const struct xfs_buf_ops xfs_agf_buf_ops = {
3202 : .name = "xfs_agf",
3203 : .magic = { cpu_to_be32(XFS_AGF_MAGIC), cpu_to_be32(XFS_AGF_MAGIC) },
3204 : .verify_read = xfs_agf_read_verify,
3205 : .verify_write = xfs_agf_write_verify,
3206 : .verify_struct = xfs_agf_verify,
3207 : };
3208 :
3209 : /*
3210 : * Read in the allocation group header (free/alloc section).
3211 : */
3212 : int
3213 1513191760 : xfs_read_agf(
3214 : struct xfs_perag *pag,
3215 : struct xfs_trans *tp,
3216 : int flags,
3217 : struct xfs_buf **agfbpp)
3218 : {
3219 1513191760 : struct xfs_mount *mp = pag->pag_mount;
3220 1513191760 : int error;
3221 :
3222 1513191760 : trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
3223 :
3224 6052498736 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
3225 1513124684 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
3226 1513124684 : XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
3227 1514273696 : if (xfs_metadata_is_sick(error))
3228 117 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
3229 1514273696 : if (error)
3230 : return error;
3231 :
3232 1508102968 : xfs_buf_set_ref(*agfbpp, XFS_AGF_REF);
3233 1508102968 : return 0;
3234 : }
3235 :
3236 : /*
3237 : * Read in the allocation group header (free/alloc section) and initialise the
3238 : * perag structure if necessary. If the caller provides @agfbpp, then return the
3239 : * locked buffer to the caller, otherwise free it.
3240 : */
3241 : int
3242 1513812258 : xfs_alloc_read_agf(
3243 : struct xfs_perag *pag,
3244 : struct xfs_trans *tp,
3245 : int flags,
3246 : struct xfs_buf **agfbpp)
3247 : {
3248 1513812258 : struct xfs_buf *agfbp;
3249 1513812258 : struct xfs_agf *agf;
3250 1513812258 : int error;
3251 1513812258 : int allocbt_blks;
3252 :
3253 1513812258 : trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
3254 :
3255 : /* We don't support trylock when freeing. */
3256 1513040068 : ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
3257 : (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
3258 1513040068 : error = xfs_read_agf(pag, tp,
3259 1513040068 : (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
3260 : &agfbp);
3261 1514225687 : if (error)
3262 : return error;
3263 :
3264 1508037324 : agf = agfbp->b_addr;
3265 3016074648 : if (!xfs_perag_initialised_agf(pag)) {
3266 523765 : pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks);
3267 523765 : pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks);
3268 523765 : pag->pagf_flcount = be32_to_cpu(agf->agf_flcount);
3269 523765 : pag->pagf_longest = be32_to_cpu(agf->agf_longest);
3270 523765 : pag->pagf_levels[XFS_BTNUM_BNOi] =
3271 523765 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]);
3272 523765 : pag->pagf_levels[XFS_BTNUM_CNTi] =
3273 523765 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]);
3274 523765 : pag->pagf_levels[XFS_BTNUM_RMAPi] =
3275 523765 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]);
3276 523765 : pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
3277 523765 : if (xfs_agfl_needs_reset(pag->pag_mount, agf))
3278 20 : set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3279 : else
3280 523745 : clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
3281 :
3282 : /*
3283 : * Update the in-core allocbt counter. Filter out the rmapbt
3284 : * subset of the btreeblks counter because the rmapbt is managed
3285 : * by perag reservation. Subtract one for the rmapbt root block
3286 : * because the rmap counter includes it while the btreeblks
3287 : * counter only tracks non-root blocks.
3288 : */
3289 523969 : allocbt_blks = pag->pagf_btreeblks;
3290 523969 : if (xfs_has_rmapbt(pag->pag_mount))
3291 453179 : allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
3292 523969 : if (allocbt_blks > 0)
3293 62071 : atomic64_add(allocbt_blks,
3294 : &pag->pag_mount->m_allocbt_blks);
3295 :
3296 523969 : set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
3297 : }
3298 : #ifdef DEBUG
3299 3015027118 : else if (!xfs_is_shutdown(pag->pag_mount)) {
3300 1507451099 : ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
3301 1507451099 : ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
3302 1507451099 : ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
3303 1507451099 : ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest));
3304 1507451099 : ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] ==
3305 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]));
3306 1507451099 : ASSERT(pag->pagf_levels[XFS_BTNUM_CNTi] ==
3307 : be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]));
3308 : }
3309 : #endif
3310 1508037530 : if (agfbpp)
3311 1507008211 : *agfbpp = agfbp;
3312 : else
3313 1029319 : xfs_trans_brelse(tp, agfbp);
3314 : return 0;
3315 : }
3316 :
3317 : /*
3318 : * Pre-proces allocation arguments to set initial state that we don't require
3319 : * callers to set up correctly, as well as bounds check the allocation args
3320 : * that are set up.
3321 : */
3322 : static int
3323 89493546 : xfs_alloc_vextent_check_args(
3324 : struct xfs_alloc_arg *args,
3325 : xfs_fsblock_t target,
3326 : xfs_agnumber_t *minimum_agno)
3327 : {
3328 89493546 : struct xfs_mount *mp = args->mp;
3329 89493546 : xfs_agblock_t agsize;
3330 :
3331 89493546 : args->fsbno = NULLFSBLOCK;
3332 :
3333 89493546 : *minimum_agno = 0;
3334 89493546 : if (args->tp->t_highest_agno != NULLAGNUMBER)
3335 1640842 : *minimum_agno = args->tp->t_highest_agno;
3336 :
3337 : /*
3338 : * Just fix this up, for the case where the last a.g. is shorter
3339 : * (or there's only one a.g.) and the caller couldn't easily figure
3340 : * that out (xfs_bmap_alloc).
3341 : */
3342 89493546 : agsize = mp->m_sb.sb_agblocks;
3343 89493546 : if (args->maxlen > agsize)
3344 0 : args->maxlen = agsize;
3345 89493546 : if (args->alignment == 0)
3346 3433939 : args->alignment = 1;
3347 :
3348 89493546 : ASSERT(args->minlen > 0);
3349 89493546 : ASSERT(args->maxlen > 0);
3350 89493546 : ASSERT(args->alignment > 0);
3351 89493546 : ASSERT(args->resv != XFS_AG_RESV_AGFL);
3352 :
3353 89493546 : ASSERT(XFS_FSB_TO_AGNO(mp, target) < mp->m_sb.sb_agcount);
3354 89493546 : ASSERT(XFS_FSB_TO_AGBNO(mp, target) < agsize);
3355 89485313 : ASSERT(args->minlen <= args->maxlen);
3356 89485313 : ASSERT(args->minlen <= agsize);
3357 89485313 : ASSERT(args->mod < args->prod);
3358 :
3359 89485313 : if (XFS_FSB_TO_AGNO(mp, target) >= mp->m_sb.sb_agcount ||
3360 89485313 : XFS_FSB_TO_AGBNO(mp, target) >= agsize ||
3361 89485459 : args->minlen > args->maxlen || args->minlen > agsize ||
3362 89485459 : args->mod >= args->prod) {
3363 0 : trace_xfs_alloc_vextent_badargs(args);
3364 0 : return -ENOSPC;
3365 : }
3366 :
3367 89485459 : if (args->agno != NULLAGNUMBER && *minimum_agno > args->agno) {
3368 3330 : trace_xfs_alloc_vextent_skip_deadlock(args);
3369 3330 : return -ENOSPC;
3370 : }
3371 : return 0;
3372 :
3373 : }
3374 :
3375 : /*
3376 : * Prepare an AG for allocation. If the AG is not prepared to accept the
3377 : * allocation, return failure.
3378 : *
3379 : * XXX(dgc): The complexity of "need_pag" will go away as all caller paths are
3380 : * modified to hold their own perag references.
3381 : */
3382 : static int
3383 276259412 : xfs_alloc_vextent_prepare_ag(
3384 : struct xfs_alloc_arg *args,
3385 : uint32_t alloc_flags)
3386 : {
3387 276259412 : bool need_pag = !args->pag;
3388 276259412 : int error;
3389 :
3390 276259412 : if (need_pag)
3391 0 : args->pag = xfs_perag_get(args->mp, args->agno);
3392 :
3393 276259412 : args->agbp = NULL;
3394 276259412 : error = xfs_alloc_fix_freelist(args, alloc_flags);
3395 276268039 : if (error) {
3396 1779 : trace_xfs_alloc_vextent_nofix(args);
3397 1779 : if (need_pag)
3398 0 : xfs_perag_put(args->pag);
3399 1779 : args->agbno = NULLAGBLOCK;
3400 1779 : return error;
3401 : }
3402 276266260 : if (!args->agbp) {
3403 : /* cannot allocate in this AG at all */
3404 190300321 : trace_xfs_alloc_vextent_noagbp(args);
3405 190298436 : args->agbno = NULLAGBLOCK;
3406 190298436 : return 0;
3407 : }
3408 85965939 : args->wasfromfl = 0;
3409 85965939 : return 0;
3410 : }
3411 :
3412 : /*
3413 : * Post-process allocation results to account for the allocation if it succeed
3414 : * and set the allocated block number correctly for the caller.
3415 : *
3416 : * XXX: we should really be returning ENOSPC for ENOSPC, not
3417 : * hiding it behind a "successful" NULLFSBLOCK allocation.
3418 : */
3419 : static int
3420 89564716 : xfs_alloc_vextent_finish(
3421 : struct xfs_alloc_arg *args,
3422 : xfs_agnumber_t minimum_agno,
3423 : int alloc_error,
3424 : bool drop_perag)
3425 : {
3426 89564716 : struct xfs_mount *mp = args->mp;
3427 89564716 : int error = 0;
3428 :
3429 : /*
3430 : * We can end up here with a locked AGF. If we failed, the caller is
3431 : * likely going to try to allocate again with different parameters, and
3432 : * that can widen the AGs that are searched for free space. If we have
3433 : * to do BMBT block allocation, we have to do a new allocation.
3434 : *
3435 : * Hence leaving this function with the AGF locked opens up potential
3436 : * ABBA AGF deadlocks because a future allocation attempt in this
3437 : * transaction may attempt to lock a lower number AGF.
3438 : *
3439 : * We can't release the AGF until the transaction is commited, so at
3440 : * this point we must update the "first allocation" tracker to point at
3441 : * this AG if the tracker is empty or points to a lower AG. This allows
3442 : * the next allocation attempt to be modified appropriately to avoid
3443 : * deadlocks.
3444 : */
3445 89564716 : if (args->agbp &&
3446 85957659 : (args->tp->t_highest_agno == NULLAGNUMBER ||
3447 1630171 : args->agno > minimum_agno))
3448 84537260 : args->tp->t_highest_agno = args->agno;
3449 :
3450 : /*
3451 : * If the allocation failed with an error or we had an ENOSPC result,
3452 : * preserve the returned error whilst also marking the allocation result
3453 : * as "no extent allocated". This ensures that callers that fail to
3454 : * capture the error will still treat it as a failed allocation.
3455 : */
3456 89564716 : if (alloc_error || args->agbno == NULLAGBLOCK) {
3457 4396608 : args->fsbno = NULLFSBLOCK;
3458 4396608 : error = alloc_error;
3459 4396608 : goto out_drop_perag;
3460 : }
3461 :
3462 85168108 : args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
3463 :
3464 85168108 : ASSERT(args->len >= args->minlen);
3465 85168108 : ASSERT(args->len <= args->maxlen);
3466 85168108 : ASSERT(args->agbno % args->alignment == 0);
3467 85168108 : XFS_AG_CHECK_DADDR(mp, XFS_FSB_TO_DADDR(mp, args->fsbno), args->len);
3468 :
3469 : /* if not file data, insert new block into the reverse map btree */
3470 85166515 : if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
3471 4749079 : error = xfs_rmap_alloc(args->tp, args->agbp, args->pag,
3472 4749079 : args->agbno, args->len, &args->oinfo);
3473 4749172 : if (error)
3474 53 : goto out_drop_perag;
3475 : }
3476 :
3477 85166555 : if (!args->wasfromfl) {
3478 85166575 : error = xfs_alloc_update_counters(args->tp, args->agbp,
3479 85166575 : -((long)(args->len)));
3480 85176048 : if (error)
3481 0 : goto out_drop_perag;
3482 :
3483 85176048 : ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
3484 : args->len));
3485 : }
3486 :
3487 85177097 : xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
3488 :
3489 85150192 : XFS_STATS_INC(mp, xs_allocx);
3490 85173855 : XFS_STATS_ADD(mp, xs_allocb, args->len);
3491 :
3492 85176781 : trace_xfs_alloc_vextent_finish(args);
3493 :
3494 89566248 : out_drop_perag:
3495 89566248 : if (drop_perag && args->pag) {
3496 83651044 : xfs_perag_rele(args->pag);
3497 83667232 : args->pag = NULL;
3498 : }
3499 89582436 : return error;
3500 : }
3501 :
3502 : /*
3503 : * Allocate within a single AG only. This uses a best-fit length algorithm so if
3504 : * you need an exact sized allocation without locality constraints, this is the
3505 : * fastest way to do it.
3506 : *
3507 : * Caller is expected to hold a perag reference in args->pag.
3508 : */
3509 : int
3510 0 : xfs_alloc_vextent_this_ag(
3511 : struct xfs_alloc_arg *args,
3512 : xfs_agnumber_t agno)
3513 : {
3514 0 : struct xfs_mount *mp = args->mp;
3515 0 : xfs_agnumber_t minimum_agno;
3516 0 : uint32_t alloc_flags = 0;
3517 0 : int error;
3518 :
3519 0 : ASSERT(args->pag != NULL);
3520 0 : ASSERT(args->pag->pag_agno == agno);
3521 :
3522 0 : args->agno = agno;
3523 0 : args->agbno = 0;
3524 :
3525 0 : trace_xfs_alloc_vextent_this_ag(args);
3526 :
3527 0 : error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
3528 : &minimum_agno);
3529 0 : if (error) {
3530 0 : if (error == -ENOSPC)
3531 : return 0;
3532 0 : return error;
3533 : }
3534 :
3535 0 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3536 0 : if (!error && args->agbp)
3537 0 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3538 :
3539 0 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3540 : }
3541 :
3542 : /*
3543 : * Iterate all AGs trying to allocate an extent starting from @start_ag.
3544 : *
3545 : * If the incoming allocation type is XFS_ALLOCTYPE_NEAR_BNO, it means the
3546 : * allocation attempts in @start_agno have locality information. If we fail to
3547 : * allocate in that AG, then we revert to anywhere-in-AG for all the other AGs
3548 : * we attempt to allocation in as there is no locality optimisation possible for
3549 : * those allocations.
3550 : *
3551 : * On return, args->pag may be left referenced if we finish before the "all
3552 : * failed" return point. The allocation finish still needs the perag, and
3553 : * so the caller will release it once they've finished the allocation.
3554 : *
3555 : * When we wrap the AG iteration at the end of the filesystem, we have to be
3556 : * careful not to wrap into AGs below ones we already have locked in the
3557 : * transaction if we are doing a blocking iteration. This will result in an
3558 : * out-of-order locking of AGFs and hence can cause deadlocks.
3559 : */
3560 : static int
3561 83807399 : xfs_alloc_vextent_iterate_ags(
3562 : struct xfs_alloc_arg *args,
3563 : xfs_agnumber_t minimum_agno,
3564 : xfs_agnumber_t start_agno,
3565 : xfs_agblock_t target_agbno,
3566 : uint32_t alloc_flags)
3567 : {
3568 83807399 : struct xfs_mount *mp = args->mp;
3569 83807399 : xfs_agnumber_t restart_agno = minimum_agno;
3570 83807399 : xfs_agnumber_t agno;
3571 83807399 : int error = 0;
3572 :
3573 83807399 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK)
3574 83807399 : restart_agno = 0;
3575 0 : restart:
3576 271534098 : for_each_perag_wrap_range(mp, start_agno, restart_agno,
3577 : mp->m_sb.sb_agcount, agno, args->pag) {
3578 270576411 : args->agno = agno;
3579 270576411 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3580 270569262 : if (error)
3581 : break;
3582 270567485 : if (!args->agbp) {
3583 187126272 : trace_xfs_alloc_vextent_loopfailed(args);
3584 187125742 : continue;
3585 : }
3586 :
3587 : /*
3588 : * Allocation is supposed to succeed now, so break out of the
3589 : * loop regardless of whether we succeed or not.
3590 : */
3591 83441213 : if (args->agno == start_agno && target_agbno) {
3592 77465184 : args->agbno = target_agbno;
3593 77465184 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3594 : } else {
3595 5976029 : args->agbno = 0;
3596 5976029 : error = xfs_alloc_ag_vextent_size(args, alloc_flags);
3597 : }
3598 : break;
3599 : }
3600 84488311 : if (error) {
3601 4978 : xfs_perag_rele(args->pag);
3602 4978 : args->pag = NULL;
3603 4978 : return error;
3604 : }
3605 84483333 : if (args->agbp)
3606 : return 0;
3607 :
3608 : /*
3609 : * We didn't find an AG we can alloation from. If we were given
3610 : * constraining flags by the caller, drop them and retry the allocation
3611 : * without any constraints being set.
3612 : */
3613 1029148 : if (alloc_flags & XFS_ALLOC_FLAG_TRYLOCK) {
3614 595798 : alloc_flags &= ~XFS_ALLOC_FLAG_TRYLOCK;
3615 595798 : restart_agno = minimum_agno;
3616 595798 : goto restart;
3617 : }
3618 :
3619 433350 : ASSERT(args->pag == NULL);
3620 433350 : trace_xfs_alloc_vextent_allfailed(args);
3621 433350 : return 0;
3622 : }
3623 :
3624 : /*
3625 : * Iterate from the AGs from the start AG to the end of the filesystem, trying
3626 : * to allocate blocks. It starts with a near allocation attempt in the initial
3627 : * AG, then falls back to anywhere-in-ag after the first AG fails. It will wrap
3628 : * back to zero if allowed by previous allocations in this transaction,
3629 : * otherwise will wrap back to the start AG and run a second blocking pass to
3630 : * the end of the filesystem.
3631 : */
3632 : int
3633 81101312 : xfs_alloc_vextent_start_ag(
3634 : struct xfs_alloc_arg *args,
3635 : xfs_fsblock_t target)
3636 : {
3637 81101312 : struct xfs_mount *mp = args->mp;
3638 81101312 : xfs_agnumber_t minimum_agno;
3639 81101312 : xfs_agnumber_t start_agno;
3640 81101312 : xfs_agnumber_t rotorstep = xfs_rotorstep;
3641 81101312 : bool bump_rotor = false;
3642 81101312 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3643 81101312 : int error;
3644 :
3645 81101312 : ASSERT(args->pag == NULL);
3646 :
3647 81101312 : args->agno = NULLAGNUMBER;
3648 81101312 : args->agbno = NULLAGBLOCK;
3649 :
3650 81101312 : trace_xfs_alloc_vextent_start_ag(args);
3651 :
3652 81081361 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3653 81069580 : if (error) {
3654 0 : if (error == -ENOSPC)
3655 : return 0;
3656 0 : return error;
3657 : }
3658 :
3659 84256419 : if ((args->datatype & XFS_ALLOC_INITIAL_USER_DATA) &&
3660 : xfs_is_inode32(mp)) {
3661 0 : target = XFS_AGB_TO_FSB(mp,
3662 : ((mp->m_agfrotor / rotorstep) %
3663 : mp->m_sb.sb_agcount), 0);
3664 0 : bump_rotor = 1;
3665 : }
3666 :
3667 81069580 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3668 81096754 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3669 81069580 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3670 :
3671 81157189 : if (bump_rotor) {
3672 0 : if (args->agno == start_agno)
3673 0 : mp->m_agfrotor = (mp->m_agfrotor + 1) %
3674 0 : (mp->m_sb.sb_agcount * rotorstep);
3675 : else
3676 0 : mp->m_agfrotor = (args->agno * rotorstep + 1) %
3677 0 : (mp->m_sb.sb_agcount * rotorstep);
3678 : }
3679 :
3680 81157189 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3681 : }
3682 :
3683 : /*
3684 : * Iterate from the agno indicated via @target through to the end of the
3685 : * filesystem attempting blocking allocation. This does not wrap or try a second
3686 : * pass, so will not recurse into AGs lower than indicated by the target.
3687 : */
3688 : int
3689 2732349 : xfs_alloc_vextent_first_ag(
3690 : struct xfs_alloc_arg *args,
3691 : xfs_fsblock_t target)
3692 : {
3693 2732349 : struct xfs_mount *mp = args->mp;
3694 2732349 : xfs_agnumber_t minimum_agno;
3695 2732349 : xfs_agnumber_t start_agno;
3696 2732349 : uint32_t alloc_flags = XFS_ALLOC_FLAG_TRYLOCK;
3697 2732349 : int error;
3698 :
3699 2732349 : ASSERT(args->pag == NULL);
3700 :
3701 2732349 : args->agno = NULLAGNUMBER;
3702 2732349 : args->agbno = NULLAGBLOCK;
3703 :
3704 2732349 : trace_xfs_alloc_vextent_first_ag(args);
3705 :
3706 2729055 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3707 2728947 : if (error) {
3708 0 : if (error == -ENOSPC)
3709 : return 0;
3710 0 : return error;
3711 : }
3712 :
3713 2728947 : start_agno = max(minimum_agno, XFS_FSB_TO_AGNO(mp, target));
3714 2729133 : error = xfs_alloc_vextent_iterate_ags(args, minimum_agno, start_agno,
3715 2728947 : XFS_FSB_TO_AGBNO(mp, target), alloc_flags);
3716 2734874 : return xfs_alloc_vextent_finish(args, minimum_agno, error, true);
3717 : }
3718 :
3719 : /*
3720 : * Allocate at the exact block target or fail. Caller is expected to hold a
3721 : * perag reference in args->pag.
3722 : */
3723 : int
3724 1834294 : xfs_alloc_vextent_exact_bno(
3725 : struct xfs_alloc_arg *args,
3726 : xfs_fsblock_t target)
3727 : {
3728 1834294 : struct xfs_mount *mp = args->mp;
3729 1834294 : xfs_agnumber_t minimum_agno;
3730 1834294 : int error;
3731 :
3732 1834294 : ASSERT(args->pag != NULL);
3733 1834294 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3734 :
3735 1834294 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3736 1834294 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3737 :
3738 1834174 : trace_xfs_alloc_vextent_exact_bno(args);
3739 :
3740 1834203 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3741 1834232 : if (error) {
3742 478 : if (error == -ENOSPC)
3743 : return 0;
3744 0 : return error;
3745 : }
3746 :
3747 1833754 : error = xfs_alloc_vextent_prepare_ag(args, 0);
3748 1833860 : if (!error && args->agbp)
3749 1052820 : error = xfs_alloc_ag_vextent_exact(args);
3750 :
3751 1833904 : return xfs_alloc_vextent_finish(args, minimum_agno, error, false);
3752 : }
3753 :
3754 : /*
3755 : * Allocate an extent as close to the target as possible. If there are not
3756 : * viable candidates in the AG, then fail the allocation.
3757 : *
3758 : * Caller may or may not have a per-ag reference in args->pag.
3759 : */
3760 : int
3761 3856271 : xfs_alloc_vextent_near_bno(
3762 : struct xfs_alloc_arg *args,
3763 : xfs_fsblock_t target)
3764 : {
3765 3856271 : struct xfs_mount *mp = args->mp;
3766 3856271 : xfs_agnumber_t minimum_agno;
3767 3856271 : bool needs_perag = args->pag == NULL;
3768 3856271 : uint32_t alloc_flags = 0;
3769 3856271 : int error;
3770 :
3771 3856271 : if (!needs_perag)
3772 3647811 : ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
3773 :
3774 3856271 : args->agno = XFS_FSB_TO_AGNO(mp, target);
3775 3856271 : args->agbno = XFS_FSB_TO_AGBNO(mp, target);
3776 :
3777 3856030 : trace_xfs_alloc_vextent_near_bno(args);
3778 :
3779 3856060 : error = xfs_alloc_vextent_check_args(args, target, &minimum_agno);
3780 3856171 : if (error) {
3781 2852 : if (error == -ENOSPC)
3782 : return 0;
3783 0 : return error;
3784 : }
3785 :
3786 3853319 : if (needs_perag)
3787 208356 : args->pag = xfs_perag_grab(mp, args->agno);
3788 :
3789 3853578 : error = xfs_alloc_vextent_prepare_ag(args, alloc_flags);
3790 3853623 : if (!error && args->agbp)
3791 1461621 : error = xfs_alloc_ag_vextent_near(args, alloc_flags);
3792 :
3793 3853602 : return xfs_alloc_vextent_finish(args, minimum_agno, error, needs_perag);
3794 : }
3795 :
3796 : /* Ensure that the freelist is at full capacity. */
3797 : int
3798 505215970 : xfs_free_extent_fix_freelist(
3799 : struct xfs_trans *tp,
3800 : struct xfs_perag *pag,
3801 : struct xfs_buf **agbp)
3802 : {
3803 505215970 : struct xfs_alloc_arg args;
3804 505215970 : int error;
3805 :
3806 505215970 : memset(&args, 0, sizeof(struct xfs_alloc_arg));
3807 505215970 : args.tp = tp;
3808 505215970 : args.mp = tp->t_mountp;
3809 505215970 : args.agno = pag->pag_agno;
3810 505215970 : args.pag = pag;
3811 :
3812 : /*
3813 : * validate that the block number is legal - the enables us to detect
3814 : * and handle a silent filesystem corruption rather than crashing.
3815 : */
3816 505215970 : if (args.agno >= args.mp->m_sb.sb_agcount)
3817 : return -EFSCORRUPTED;
3818 :
3819 505215970 : error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING);
3820 505226031 : if (error)
3821 : return error;
3822 :
3823 505224511 : *agbp = args.agbp;
3824 505224511 : return 0;
3825 : }
3826 :
3827 : /*
3828 : * Free an extent.
3829 : * Just break up the extent address and hand off to xfs_free_ag_extent
3830 : * after fixing up the freelist.
3831 : */
3832 : int
3833 98536263 : __xfs_free_extent(
3834 : struct xfs_trans *tp,
3835 : struct xfs_perag *pag,
3836 : xfs_agblock_t agbno,
3837 : xfs_extlen_t len,
3838 : const struct xfs_owner_info *oinfo,
3839 : enum xfs_ag_resv_type type,
3840 : bool skip_discard)
3841 : {
3842 98536263 : struct xfs_mount *mp = tp->t_mountp;
3843 98536263 : struct xfs_buf *agbp;
3844 98536263 : struct xfs_agf *agf;
3845 98536263 : int error;
3846 98536263 : unsigned int busy_flags = 0;
3847 :
3848 98536263 : ASSERT(len != 0);
3849 98536263 : ASSERT(type != XFS_AG_RESV_AGFL);
3850 :
3851 98536263 : if (XFS_TEST_ERROR(false, mp,
3852 : XFS_ERRTAG_FREE_EXTENT))
3853 : return -EIO;
3854 :
3855 98531201 : error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
3856 98551884 : if (error) {
3857 34 : if (xfs_metadata_is_sick(error))
3858 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3859 34 : return error;
3860 : }
3861 :
3862 98551850 : agf = agbp->b_addr;
3863 :
3864 98551850 : if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
3865 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3866 0 : error = -EFSCORRUPTED;
3867 0 : goto err_release;
3868 : }
3869 :
3870 : /* validate the extent size is legal now we have the agf locked */
3871 98551850 : if (XFS_IS_CORRUPT(mp, agbno + len > be32_to_cpu(agf->agf_length))) {
3872 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_BNOBT);
3873 0 : error = -EFSCORRUPTED;
3874 0 : goto err_release;
3875 : }
3876 :
3877 98551850 : error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
3878 : type);
3879 98539070 : if (error)
3880 278 : goto err_release;
3881 :
3882 98538792 : if (skip_discard)
3883 6993138 : busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
3884 98538792 : xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
3885 98538792 : return 0;
3886 :
3887 278 : err_release:
3888 278 : xfs_trans_brelse(tp, agbp);
3889 278 : return error;
3890 : }
3891 :
3892 : struct xfs_alloc_query_range_info {
3893 : xfs_alloc_query_range_fn fn;
3894 : void *priv;
3895 : };
3896 :
3897 : /* Format btree record and pass to our callback. */
3898 : STATIC int
3899 2264400203 : xfs_alloc_query_range_helper(
3900 : struct xfs_btree_cur *cur,
3901 : const union xfs_btree_rec *rec,
3902 : void *priv)
3903 : {
3904 2264400203 : struct xfs_alloc_query_range_info *query = priv;
3905 2264400203 : struct xfs_alloc_rec_incore irec;
3906 2264400203 : xfs_failaddr_t fa;
3907 :
3908 2264400203 : xfs_alloc_btrec_to_irec(rec, &irec);
3909 2264400203 : fa = xfs_alloc_check_irec(cur, &irec);
3910 2264371165 : if (fa)
3911 0 : return xfs_alloc_complain_bad_rec(cur, fa, &irec);
3912 :
3913 2264371165 : return query->fn(cur, &irec, query->priv);
3914 : }
3915 :
3916 : /* Find all free space within a given range of blocks. */
3917 : int
3918 57341 : xfs_alloc_query_range(
3919 : struct xfs_btree_cur *cur,
3920 : const struct xfs_alloc_rec_incore *low_rec,
3921 : const struct xfs_alloc_rec_incore *high_rec,
3922 : xfs_alloc_query_range_fn fn,
3923 : void *priv)
3924 : {
3925 57341 : union xfs_btree_irec low_brec = { .a = *low_rec };
3926 57341 : union xfs_btree_irec high_brec = { .a = *high_rec };
3927 57341 : struct xfs_alloc_query_range_info query = { .priv = priv, .fn = fn };
3928 :
3929 57341 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3930 57341 : return xfs_btree_query_range(cur, &low_brec, &high_brec,
3931 : xfs_alloc_query_range_helper, &query);
3932 : }
3933 :
3934 : /* Find all free space records. */
3935 : int
3936 685695 : xfs_alloc_query_all(
3937 : struct xfs_btree_cur *cur,
3938 : xfs_alloc_query_range_fn fn,
3939 : void *priv)
3940 : {
3941 685695 : struct xfs_alloc_query_range_info query;
3942 :
3943 685695 : ASSERT(cur->bc_btnum == XFS_BTNUM_BNO);
3944 685695 : query.priv = priv;
3945 685695 : query.fn = fn;
3946 685695 : return xfs_btree_query_all(cur, xfs_alloc_query_range_helper, &query);
3947 : }
3948 :
3949 : /*
3950 : * Scan part of the keyspace of the free space and tell us if the area has no
3951 : * records, is fully mapped by records, or is partially filled.
3952 : */
3953 : int
3954 1458622660 : xfs_alloc_has_records(
3955 : struct xfs_btree_cur *cur,
3956 : xfs_agblock_t bno,
3957 : xfs_extlen_t len,
3958 : enum xbtree_recpacking *outcome)
3959 : {
3960 1458622660 : union xfs_btree_irec low;
3961 1458622660 : union xfs_btree_irec high;
3962 :
3963 1458622660 : memset(&low, 0, sizeof(low));
3964 1458622660 : low.a.ar_startblock = bno;
3965 1458622660 : memset(&high, 0xFF, sizeof(high));
3966 1458622660 : high.a.ar_startblock = bno + len - 1;
3967 :
3968 1458622660 : return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
3969 : }
3970 :
3971 : /*
3972 : * Walk all the blocks in the AGFL. The @walk_fn can return any negative
3973 : * error code or XFS_ITER_*.
3974 : */
3975 : int
3976 59312116 : xfs_agfl_walk(
3977 : struct xfs_mount *mp,
3978 : struct xfs_agf *agf,
3979 : struct xfs_buf *agflbp,
3980 : xfs_agfl_walk_fn walk_fn,
3981 : void *priv)
3982 : {
3983 59312116 : __be32 *agfl_bno;
3984 59312116 : unsigned int i;
3985 59312116 : int error;
3986 :
3987 59312116 : agfl_bno = xfs_buf_to_agfl_bno(agflbp);
3988 59312116 : i = be32_to_cpu(agf->agf_flfirst);
3989 :
3990 : /* Nothing to walk in an empty AGFL. */
3991 59312116 : if (agf->agf_flcount == cpu_to_be32(0))
3992 : return 0;
3993 :
3994 : /* Otherwise, walk from first to last, wrapping as needed. */
3995 573821857 : for (;;) {
3996 573821857 : error = walk_fn(mp, be32_to_cpu(agfl_bno[i]), priv);
3997 573822008 : if (error)
3998 2226582 : return error;
3999 571595426 : if (i == be32_to_cpu(agf->agf_fllast))
4000 : break;
4001 1029078220 : if (++i == xfs_agfl_size(mp))
4002 501264 : i = 0;
4003 : }
4004 :
4005 : return 0;
4006 : }
4007 :
4008 : int __init
4009 50 : xfs_extfree_intent_init_cache(void)
4010 : {
4011 50 : xfs_extfree_item_cache = kmem_cache_create("xfs_extfree_intent",
4012 : sizeof(struct xfs_extent_free_item),
4013 : 0, 0, NULL);
4014 :
4015 50 : return xfs_extfree_item_cache != NULL ? 0 : -ENOMEM;
4016 : }
4017 :
4018 : void
4019 49 : xfs_extfree_intent_destroy_cache(void)
4020 : {
4021 49 : kmem_cache_destroy(xfs_extfree_item_cache);
4022 49 : xfs_extfree_item_cache = NULL;
4023 49 : }
|