Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0+
2 : /*
3 : * Copyright (C) 2016 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_defer.h"
14 : #include "xfs_btree.h"
15 : #include "xfs_bmap.h"
16 : #include "xfs_refcount_btree.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_errortag.h"
19 : #include "xfs_error.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_trans.h"
22 : #include "xfs_bit.h"
23 : #include "xfs_refcount.h"
24 : #include "xfs_rmap.h"
25 : #include "xfs_ag.h"
26 : #include "xfs_health.h"
27 :
28 : struct kmem_cache *xfs_refcount_intent_cache;
29 :
30 : /* Allowable refcount adjustment amounts. */
31 : enum xfs_refc_adjust_op {
32 : XFS_REFCOUNT_ADJUST_INCREASE = 1,
33 : XFS_REFCOUNT_ADJUST_DECREASE = -1,
34 : XFS_REFCOUNT_ADJUST_COW_ALLOC = 0,
35 : XFS_REFCOUNT_ADJUST_COW_FREE = -1,
36 : };
37 :
38 : STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
39 : xfs_agblock_t agbno, xfs_extlen_t aglen);
40 : STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
41 : xfs_agblock_t agbno, xfs_extlen_t aglen);
42 :
43 : /*
44 : * Look up the first record less than or equal to [bno, len] in the btree
45 : * given by cur.
46 : */
47 : int
48 1641912327 : xfs_refcount_lookup_le(
49 : struct xfs_btree_cur *cur,
50 : enum xfs_refc_domain domain,
51 : xfs_agblock_t bno,
52 : int *stat)
53 : {
54 1709057807 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
55 : xfs_refcount_encode_startblock(bno, domain),
56 : XFS_LOOKUP_LE);
57 1641110185 : cur->bc_rec.rc.rc_startblock = bno;
58 1641110185 : cur->bc_rec.rc.rc_blockcount = 0;
59 1641110185 : cur->bc_rec.rc.rc_domain = domain;
60 1641110185 : return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
61 : }
62 :
63 : /*
64 : * Look up the first record greater than or equal to [bno, len] in the btree
65 : * given by cur.
66 : */
67 : int
68 451121031 : xfs_refcount_lookup_ge(
69 : struct xfs_btree_cur *cur,
70 : enum xfs_refc_domain domain,
71 : xfs_agblock_t bno,
72 : int *stat)
73 : {
74 504081680 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
75 : xfs_refcount_encode_startblock(bno, domain),
76 : XFS_LOOKUP_GE);
77 451120957 : cur->bc_rec.rc.rc_startblock = bno;
78 451120957 : cur->bc_rec.rc.rc_blockcount = 0;
79 451120957 : cur->bc_rec.rc.rc_domain = domain;
80 451120957 : return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
81 : }
82 :
83 : /*
84 : * Look up the first record equal to [bno, len] in the btree
85 : * given by cur.
86 : */
87 : int
88 0 : xfs_refcount_lookup_eq(
89 : struct xfs_btree_cur *cur,
90 : enum xfs_refc_domain domain,
91 : xfs_agblock_t bno,
92 : int *stat)
93 : {
94 0 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
95 : xfs_refcount_encode_startblock(bno, domain),
96 : XFS_LOOKUP_LE);
97 0 : cur->bc_rec.rc.rc_startblock = bno;
98 0 : cur->bc_rec.rc.rc_blockcount = 0;
99 0 : cur->bc_rec.rc.rc_domain = domain;
100 0 : return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
101 : }
102 :
103 : /* Convert on-disk record to in-core format. */
104 : void
105 2774181694 : xfs_refcount_btrec_to_irec(
106 : const union xfs_btree_rec *rec,
107 : struct xfs_refcount_irec *irec)
108 : {
109 2774181694 : uint32_t start;
110 :
111 2774181694 : start = be32_to_cpu(rec->refc.rc_startblock);
112 2774181694 : if (start & XFS_REFC_COWFLAG) {
113 183004419 : start &= ~XFS_REFC_COWFLAG;
114 183004419 : irec->rc_domain = XFS_REFC_DOMAIN_COW;
115 : } else {
116 2591177275 : irec->rc_domain = XFS_REFC_DOMAIN_SHARED;
117 : }
118 :
119 2774181694 : irec->rc_startblock = start;
120 2774181694 : irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
121 2774181694 : irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
122 2774181694 : }
123 :
124 : inline xfs_failaddr_t
125 2781171286 : xfs_refcount_check_perag_irec(
126 : struct xfs_perag *pag,
127 : const struct xfs_refcount_irec *irec)
128 : {
129 2781171286 : if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
130 0 : return __this_address;
131 :
132 2781171286 : if (!xfs_refcount_check_domain(irec))
133 0 : return __this_address;
134 :
135 : /* check for valid extent range, including overflow */
136 2781171286 : if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
137 10 : return __this_address;
138 :
139 2781171276 : if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
140 0 : return __this_address;
141 :
142 : return NULL;
143 : }
144 :
145 : /* Simple checks for refcount records. */
146 : xfs_failaddr_t
147 55773022 : xfs_refcount_check_irec(
148 : struct xfs_btree_cur *cur,
149 : const struct xfs_refcount_irec *irec)
150 : {
151 55773022 : return xfs_refcount_check_perag_irec(cur->bc_ag.pag, irec);
152 : }
153 :
154 : static inline int
155 10 : xfs_refcount_complain_bad_rec(
156 : struct xfs_btree_cur *cur,
157 : xfs_failaddr_t fa,
158 : const struct xfs_refcount_irec *irec)
159 : {
160 10 : struct xfs_mount *mp = cur->bc_mp;
161 :
162 10 : xfs_warn(mp,
163 : "Refcount BTree record corruption in AG %d detected at %pS!",
164 : cur->bc_ag.pag->pag_agno, fa);
165 10 : xfs_warn(mp,
166 : "Start block 0x%x, block count 0x%x, references 0x%x",
167 : irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
168 10 : xfs_btree_mark_sick(cur);
169 10 : return -EFSCORRUPTED;
170 : }
171 :
172 : /*
173 : * Get the data from the pointed-to record.
174 : */
175 : int
176 2729238571 : xfs_refcount_get_rec(
177 : struct xfs_btree_cur *cur,
178 : struct xfs_refcount_irec *irec,
179 : int *stat)
180 : {
181 2729238571 : union xfs_btree_rec *rec;
182 2729238571 : xfs_failaddr_t fa;
183 2729238571 : int error;
184 :
185 2729238571 : error = xfs_btree_get_rec(cur, &rec, stat);
186 2728689769 : if (error || !*stat)
187 : return error;
188 :
189 2717609326 : xfs_refcount_btrec_to_irec(rec, irec);
190 2717040890 : fa = xfs_refcount_check_irec(cur, irec);
191 2717130275 : if (fa)
192 10 : return xfs_refcount_complain_bad_rec(cur, fa, irec);
193 :
194 2717130265 : trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
195 2717130265 : return 0;
196 : }
197 :
198 : /*
199 : * Update the record referred to by cur to the value given
200 : * by [bno, len, refcount].
201 : * This either works (return 0) or gets an EFSCORRUPTED error.
202 : */
203 : STATIC int
204 139493869 : xfs_refcount_update(
205 : struct xfs_btree_cur *cur,
206 : struct xfs_refcount_irec *irec)
207 : {
208 139493869 : union xfs_btree_rec rec;
209 139493869 : uint32_t start;
210 139493869 : int error;
211 :
212 139493869 : trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
213 :
214 139493760 : start = xfs_refcount_encode_startblock(irec->rc_startblock,
215 : irec->rc_domain);
216 139493760 : rec.refc.rc_startblock = cpu_to_be32(start);
217 139493760 : rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
218 139493760 : rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
219 :
220 139493760 : error = xfs_btree_update(cur, &rec);
221 139493753 : if (error)
222 0 : trace_xfs_refcount_update_error(cur->bc_mp,
223 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
224 139493753 : return error;
225 : }
226 :
227 : /*
228 : * Insert the record referred to by cur to the value given
229 : * by [bno, len, refcount].
230 : * This either works (return 0) or gets an EFSCORRUPTED error.
231 : */
232 : int
233 77265551 : xfs_refcount_insert(
234 : struct xfs_btree_cur *cur,
235 : struct xfs_refcount_irec *irec,
236 : int *i)
237 : {
238 77265551 : int error;
239 :
240 77265551 : trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
241 :
242 77265326 : cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
243 77265326 : cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
244 77265326 : cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
245 77265326 : cur->bc_rec.rc.rc_domain = irec->rc_domain;
246 :
247 77265326 : error = xfs_btree_insert(cur, i);
248 77265549 : if (error)
249 81 : goto out_error;
250 77265468 : if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
251 0 : xfs_btree_mark_sick(cur);
252 0 : error = -EFSCORRUPTED;
253 0 : goto out_error;
254 : }
255 :
256 77265549 : out_error:
257 77265549 : if (error)
258 162 : trace_xfs_refcount_insert_error(cur->bc_mp,
259 81 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
260 77265549 : return error;
261 : }
262 :
263 : /*
264 : * Remove the record referred to by cur, then set the pointer to the spot
265 : * where the record could be re-inserted, in case we want to increment or
266 : * decrement the cursor.
267 : * This either works (return 0) or gets an EFSCORRUPTED error.
268 : */
269 : STATIC int
270 71031853 : xfs_refcount_delete(
271 : struct xfs_btree_cur *cur,
272 : int *i)
273 : {
274 71031853 : struct xfs_refcount_irec irec;
275 71031853 : int found_rec;
276 71031853 : int error;
277 :
278 71031853 : error = xfs_refcount_get_rec(cur, &irec, &found_rec);
279 71033829 : if (error)
280 0 : goto out_error;
281 71033829 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
282 0 : xfs_btree_mark_sick(cur);
283 0 : error = -EFSCORRUPTED;
284 0 : goto out_error;
285 : }
286 71033829 : trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
287 71033469 : error = xfs_btree_delete(cur, i);
288 71031065 : if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
289 0 : xfs_btree_mark_sick(cur);
290 0 : error = -EFSCORRUPTED;
291 0 : goto out_error;
292 : }
293 71031065 : if (error)
294 0 : goto out_error;
295 71031065 : error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock,
296 : &found_rec);
297 71033623 : out_error:
298 71033623 : if (error)
299 0 : trace_xfs_refcount_delete_error(cur->bc_mp,
300 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
301 71033623 : return error;
302 : }
303 :
304 : /*
305 : * Adjusting the Reference Count
306 : *
307 : * As stated elsewhere, the reference count btree (refcbt) stores
308 : * >1 reference counts for extents of physical blocks. In this
309 : * operation, we're either raising or lowering the reference count of
310 : * some subrange stored in the tree:
311 : *
312 : * <------ adjustment range ------>
313 : * ----+ +---+-----+ +--+--------+---------
314 : * 2 | | 3 | 4 | |17| 55 | 10
315 : * ----+ +---+-----+ +--+--------+---------
316 : * X axis is physical blocks number;
317 : * reference counts are the numbers inside the rectangles
318 : *
319 : * The first thing we need to do is to ensure that there are no
320 : * refcount extents crossing either boundary of the range to be
321 : * adjusted. For any extent that does cross a boundary, split it into
322 : * two extents so that we can increment the refcount of one of the
323 : * pieces later:
324 : *
325 : * <------ adjustment range ------>
326 : * ----+ +---+-----+ +--+--------+----+----
327 : * 2 | | 3 | 2 | |17| 55 | 10 | 10
328 : * ----+ +---+-----+ +--+--------+----+----
329 : *
330 : * For this next step, let's assume that all the physical blocks in
331 : * the adjustment range are mapped to a file and are therefore in use
332 : * at least once. Therefore, we can infer that any gap in the
333 : * refcount tree within the adjustment range represents a physical
334 : * extent with refcount == 1:
335 : *
336 : * <------ adjustment range ------>
337 : * ----+---+---+-----+-+--+--------+----+----
338 : * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10
339 : * ----+---+---+-----+-+--+--------+----+----
340 : * ^
341 : *
342 : * For each extent that falls within the interval range, figure out
343 : * which extent is to the left or the right of that extent. Now we
344 : * have a left, current, and right extent. If the new reference count
345 : * of the center extent enables us to merge left, center, and right
346 : * into one record covering all three, do so. If the center extent is
347 : * at the left end of the range, abuts the left extent, and its new
348 : * reference count matches the left extent's record, then merge them.
349 : * If the center extent is at the right end of the range, abuts the
350 : * right extent, and the reference counts match, merge those. In the
351 : * example, we can left merge (assuming an increment operation):
352 : *
353 : * <------ adjustment range ------>
354 : * --------+---+-----+-+--+--------+----+----
355 : * 2 | 3 | 2 |1|17| 55 | 10 | 10
356 : * --------+---+-----+-+--+--------+----+----
357 : * ^
358 : *
359 : * For all other extents within the range, adjust the reference count
360 : * or delete it if the refcount falls below 2. If we were
361 : * incrementing, the end result looks like this:
362 : *
363 : * <------ adjustment range ------>
364 : * --------+---+-----+-+--+--------+----+----
365 : * 2 | 4 | 3 |2|18| 56 | 11 | 10
366 : * --------+---+-----+-+--+--------+----+----
367 : *
368 : * The result of a decrement operation looks as such:
369 : *
370 : * <------ adjustment range ------>
371 : * ----+ +---+ +--+--------+----+----
372 : * 2 | | 2 | |16| 54 | 9 | 10
373 : * ----+ +---+ +--+--------+----+----
374 : * DDDD 111111DD
375 : *
376 : * The blocks marked "D" are freed; the blocks marked "1" are only
377 : * referenced once and therefore the record is removed from the
378 : * refcount btree.
379 : */
380 :
381 : /* Next block after this extent. */
382 : static inline xfs_agblock_t
383 : xfs_refc_next(
384 : struct xfs_refcount_irec *rc)
385 : {
386 473375409 : return rc->rc_startblock + rc->rc_blockcount;
387 : }
388 :
389 : /*
390 : * Split a refcount extent that crosses agbno.
391 : */
392 : STATIC int
393 386269508 : xfs_refcount_split_extent(
394 : struct xfs_btree_cur *cur,
395 : enum xfs_refc_domain domain,
396 : xfs_agblock_t agbno,
397 : bool *shape_changed)
398 : {
399 386269508 : struct xfs_refcount_irec rcext, tmp;
400 386269508 : int found_rec;
401 386269508 : int error;
402 :
403 386269508 : *shape_changed = false;
404 386269508 : error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec);
405 386282643 : if (error)
406 196 : goto out_error;
407 386282447 : if (!found_rec)
408 : return 0;
409 :
410 375590970 : error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
411 375585743 : if (error)
412 0 : goto out_error;
413 375585743 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
414 0 : xfs_btree_mark_sick(cur);
415 0 : error = -EFSCORRUPTED;
416 0 : goto out_error;
417 : }
418 375585743 : if (rcext.rc_domain != domain)
419 : return 0;
420 374927841 : if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
421 : return 0;
422 :
423 45879072 : *shape_changed = true;
424 45879072 : trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
425 : &rcext, agbno);
426 :
427 : /* Establish the right extent. */
428 45878968 : tmp = rcext;
429 45878968 : tmp.rc_startblock = agbno;
430 45878968 : tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
431 45878968 : error = xfs_refcount_update(cur, &tmp);
432 45878862 : if (error)
433 0 : goto out_error;
434 :
435 : /* Insert the left extent. */
436 45878862 : tmp = rcext;
437 45878862 : tmp.rc_blockcount = agbno - rcext.rc_startblock;
438 45878862 : error = xfs_refcount_insert(cur, &tmp, &found_rec);
439 45878823 : if (error)
440 37 : goto out_error;
441 45878786 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
442 0 : xfs_btree_mark_sick(cur);
443 0 : error = -EFSCORRUPTED;
444 0 : goto out_error;
445 : }
446 : return error;
447 :
448 233 : out_error:
449 466 : trace_xfs_refcount_split_extent_error(cur->bc_mp,
450 233 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
451 233 : return error;
452 : }
453 :
454 : /*
455 : * Merge the left, center, and right extents.
456 : */
457 : STATIC int
458 2475445 : xfs_refcount_merge_center_extents(
459 : struct xfs_btree_cur *cur,
460 : struct xfs_refcount_irec *left,
461 : struct xfs_refcount_irec *center,
462 : struct xfs_refcount_irec *right,
463 : unsigned long long extlen,
464 : xfs_extlen_t *aglen)
465 : {
466 2475445 : int error;
467 2475445 : int found_rec;
468 :
469 2475445 : trace_xfs_refcount_merge_center_extents(cur->bc_mp,
470 2475445 : cur->bc_ag.pag->pag_agno, left, center, right);
471 :
472 2475444 : ASSERT(left->rc_domain == center->rc_domain);
473 2475444 : ASSERT(right->rc_domain == center->rc_domain);
474 :
475 : /*
476 : * Make sure the center and right extents are not in the btree.
477 : * If the center extent was synthesized, the first delete call
478 : * removes the right extent and we skip the second deletion.
479 : * If center and right were in the btree, then the first delete
480 : * call removes the center and the second one removes the right
481 : * extent.
482 : */
483 2475444 : error = xfs_refcount_lookup_ge(cur, center->rc_domain,
484 : center->rc_startblock, &found_rec);
485 2475445 : if (error)
486 0 : goto out_error;
487 2475445 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
488 0 : xfs_btree_mark_sick(cur);
489 0 : error = -EFSCORRUPTED;
490 0 : goto out_error;
491 : }
492 :
493 2475445 : error = xfs_refcount_delete(cur, &found_rec);
494 2475444 : if (error)
495 0 : goto out_error;
496 2475444 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
497 0 : xfs_btree_mark_sick(cur);
498 0 : error = -EFSCORRUPTED;
499 0 : goto out_error;
500 : }
501 :
502 2475444 : if (center->rc_refcount > 1) {
503 454541 : error = xfs_refcount_delete(cur, &found_rec);
504 454541 : if (error)
505 0 : goto out_error;
506 454541 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
507 0 : xfs_btree_mark_sick(cur);
508 0 : error = -EFSCORRUPTED;
509 0 : goto out_error;
510 : }
511 : }
512 :
513 : /* Enlarge the left extent. */
514 2475444 : error = xfs_refcount_lookup_le(cur, left->rc_domain,
515 : left->rc_startblock, &found_rec);
516 2475445 : if (error)
517 0 : goto out_error;
518 2475445 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
519 0 : xfs_btree_mark_sick(cur);
520 0 : error = -EFSCORRUPTED;
521 0 : goto out_error;
522 : }
523 :
524 2475445 : left->rc_blockcount = extlen;
525 2475445 : error = xfs_refcount_update(cur, left);
526 2475444 : if (error)
527 0 : goto out_error;
528 :
529 2475444 : *aglen = 0;
530 2475444 : return error;
531 :
532 0 : out_error:
533 0 : trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
534 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
535 0 : return error;
536 : }
537 :
538 : /*
539 : * Merge with the left extent.
540 : */
541 : STATIC int
542 8865323 : xfs_refcount_merge_left_extent(
543 : struct xfs_btree_cur *cur,
544 : struct xfs_refcount_irec *left,
545 : struct xfs_refcount_irec *cleft,
546 : xfs_agblock_t *agbno,
547 : xfs_extlen_t *aglen)
548 : {
549 8865323 : int error;
550 8865323 : int found_rec;
551 :
552 8865323 : trace_xfs_refcount_merge_left_extent(cur->bc_mp,
553 8865323 : cur->bc_ag.pag->pag_agno, left, cleft);
554 :
555 8865314 : ASSERT(left->rc_domain == cleft->rc_domain);
556 :
557 : /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
558 8865314 : if (cleft->rc_refcount > 1) {
559 2649656 : error = xfs_refcount_lookup_le(cur, cleft->rc_domain,
560 : cleft->rc_startblock, &found_rec);
561 2649658 : if (error)
562 0 : goto out_error;
563 2649658 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
564 0 : xfs_btree_mark_sick(cur);
565 0 : error = -EFSCORRUPTED;
566 0 : goto out_error;
567 : }
568 :
569 2649658 : error = xfs_refcount_delete(cur, &found_rec);
570 2649658 : if (error)
571 0 : goto out_error;
572 2649658 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
573 0 : xfs_btree_mark_sick(cur);
574 0 : error = -EFSCORRUPTED;
575 0 : goto out_error;
576 : }
577 : }
578 :
579 : /* Enlarge the left extent. */
580 8865316 : error = xfs_refcount_lookup_le(cur, left->rc_domain,
581 : left->rc_startblock, &found_rec);
582 8865339 : if (error)
583 0 : goto out_error;
584 8865339 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
585 0 : xfs_btree_mark_sick(cur);
586 0 : error = -EFSCORRUPTED;
587 0 : goto out_error;
588 : }
589 :
590 8865339 : left->rc_blockcount += cleft->rc_blockcount;
591 8865339 : error = xfs_refcount_update(cur, left);
592 8865339 : if (error)
593 0 : goto out_error;
594 :
595 8865339 : *agbno += cleft->rc_blockcount;
596 8865339 : *aglen -= cleft->rc_blockcount;
597 8865339 : return error;
598 :
599 0 : out_error:
600 0 : trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
601 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
602 0 : return error;
603 : }
604 :
605 : /*
606 : * Merge with the right extent.
607 : */
608 : STATIC int
609 4393683 : xfs_refcount_merge_right_extent(
610 : struct xfs_btree_cur *cur,
611 : struct xfs_refcount_irec *right,
612 : struct xfs_refcount_irec *cright,
613 : xfs_extlen_t *aglen)
614 : {
615 4393683 : int error;
616 4393683 : int found_rec;
617 :
618 4393683 : trace_xfs_refcount_merge_right_extent(cur->bc_mp,
619 4393683 : cur->bc_ag.pag->pag_agno, cright, right);
620 :
621 4393683 : ASSERT(right->rc_domain == cright->rc_domain);
622 :
623 : /*
624 : * If the extent ending at agbno+aglen (cright) wasn't synthesized,
625 : * remove it.
626 : */
627 4393683 : if (cright->rc_refcount > 1) {
628 2011680 : error = xfs_refcount_lookup_le(cur, cright->rc_domain,
629 : cright->rc_startblock, &found_rec);
630 2011681 : if (error)
631 0 : goto out_error;
632 2011681 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
633 0 : xfs_btree_mark_sick(cur);
634 0 : error = -EFSCORRUPTED;
635 0 : goto out_error;
636 : }
637 :
638 2011681 : error = xfs_refcount_delete(cur, &found_rec);
639 2011681 : if (error)
640 0 : goto out_error;
641 2011681 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
642 0 : xfs_btree_mark_sick(cur);
643 0 : error = -EFSCORRUPTED;
644 0 : goto out_error;
645 : }
646 : }
647 :
648 : /* Enlarge the right extent. */
649 4393684 : error = xfs_refcount_lookup_le(cur, right->rc_domain,
650 : right->rc_startblock, &found_rec);
651 4393684 : if (error)
652 0 : goto out_error;
653 4393684 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
654 0 : xfs_btree_mark_sick(cur);
655 0 : error = -EFSCORRUPTED;
656 0 : goto out_error;
657 : }
658 :
659 4393684 : right->rc_startblock -= cright->rc_blockcount;
660 4393684 : right->rc_blockcount += cright->rc_blockcount;
661 4393684 : error = xfs_refcount_update(cur, right);
662 4393684 : if (error)
663 0 : goto out_error;
664 :
665 4393684 : *aglen -= cright->rc_blockcount;
666 4393684 : return error;
667 :
668 0 : out_error:
669 0 : trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
670 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
671 0 : return error;
672 : }
673 :
674 : /*
675 : * Find the left extent and the one after it (cleft). This function assumes
676 : * that we've already split any extent crossing agbno.
677 : */
678 : STATIC int
679 193141020 : xfs_refcount_find_left_extents(
680 : struct xfs_btree_cur *cur,
681 : struct xfs_refcount_irec *left,
682 : struct xfs_refcount_irec *cleft,
683 : enum xfs_refc_domain domain,
684 : xfs_agblock_t agbno,
685 : xfs_extlen_t aglen)
686 : {
687 193141020 : struct xfs_refcount_irec tmp;
688 193141020 : int error;
689 193141020 : int found_rec;
690 :
691 193141020 : left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
692 193141020 : error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec);
693 193142165 : if (error)
694 0 : goto out_error;
695 193142165 : if (!found_rec)
696 : return 0;
697 :
698 166174230 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
699 166174360 : if (error)
700 0 : goto out_error;
701 166174360 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
702 0 : xfs_btree_mark_sick(cur);
703 0 : error = -EFSCORRUPTED;
704 0 : goto out_error;
705 : }
706 :
707 166174360 : if (tmp.rc_domain != domain)
708 : return 0;
709 164563377 : if (xfs_refc_next(&tmp) != agbno)
710 : return 0;
711 : /* We have a left extent; retrieve (or invent) the next right one */
712 37700411 : *left = tmp;
713 :
714 37700411 : error = xfs_btree_increment(cur, 0, &found_rec);
715 37700368 : if (error)
716 0 : goto out_error;
717 37700368 : if (found_rec) {
718 36344156 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
719 36344203 : if (error)
720 0 : goto out_error;
721 36344203 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
722 0 : xfs_btree_mark_sick(cur);
723 0 : error = -EFSCORRUPTED;
724 0 : goto out_error;
725 : }
726 :
727 36344203 : if (tmp.rc_domain != domain)
728 220938 : goto not_found;
729 :
730 : /* if tmp starts at the end of our range, just use that */
731 36123265 : if (tmp.rc_startblock == agbno)
732 29030838 : *cleft = tmp;
733 : else {
734 : /*
735 : * There's a gap in the refcntbt at the start of the
736 : * range we're interested in (refcount == 1) so
737 : * synthesize the implied extent and pass it back.
738 : * We assume here that the agbno/aglen range was
739 : * passed in from a data fork extent mapping and
740 : * therefore is allocated to exactly one owner.
741 : */
742 7092427 : cleft->rc_startblock = agbno;
743 7092427 : cleft->rc_blockcount = min(aglen,
744 : tmp.rc_startblock - agbno);
745 7092427 : cleft->rc_refcount = 1;
746 7092427 : cleft->rc_domain = domain;
747 : }
748 : } else {
749 1356212 : not_found:
750 : /*
751 : * No extents, so pretend that there's one covering the whole
752 : * range.
753 : */
754 1577150 : cleft->rc_startblock = agbno;
755 1577150 : cleft->rc_blockcount = aglen;
756 1577150 : cleft->rc_refcount = 1;
757 1577150 : cleft->rc_domain = domain;
758 : }
759 37700415 : trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
760 : left, cleft, agbno);
761 37700415 : return error;
762 :
763 0 : out_error:
764 0 : trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
765 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
766 0 : return error;
767 : }
768 :
769 : /*
770 : * Find the right extent and the one before it (cright). This function
771 : * assumes that we've already split any extents crossing agbno + aglen.
772 : */
773 : STATIC int
774 193141336 : xfs_refcount_find_right_extents(
775 : struct xfs_btree_cur *cur,
776 : struct xfs_refcount_irec *right,
777 : struct xfs_refcount_irec *cright,
778 : enum xfs_refc_domain domain,
779 : xfs_agblock_t agbno,
780 : xfs_extlen_t aglen)
781 : {
782 193141336 : struct xfs_refcount_irec tmp;
783 193141336 : int error;
784 193141336 : int found_rec;
785 :
786 193141336 : right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
787 193141336 : error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec);
788 193141731 : if (error)
789 0 : goto out_error;
790 193141731 : if (!found_rec)
791 : return 0;
792 :
793 158523005 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
794 158523716 : if (error)
795 10 : goto out_error;
796 158523706 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
797 0 : xfs_btree_mark_sick(cur);
798 0 : error = -EFSCORRUPTED;
799 0 : goto out_error;
800 : }
801 :
802 158523706 : if (tmp.rc_domain != domain)
803 : return 0;
804 148810533 : if (tmp.rc_startblock != agbno + aglen)
805 : return 0;
806 : /* We have a right extent; retrieve (or invent) the next left one */
807 35341016 : *right = tmp;
808 :
809 35341016 : error = xfs_btree_decrement(cur, 0, &found_rec);
810 35340941 : if (error)
811 0 : goto out_error;
812 35340941 : if (found_rec) {
813 35300056 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
814 35300039 : if (error)
815 0 : goto out_error;
816 35300039 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
817 0 : xfs_btree_mark_sick(cur);
818 0 : error = -EFSCORRUPTED;
819 0 : goto out_error;
820 : }
821 :
822 35300039 : if (tmp.rc_domain != domain)
823 861 : goto not_found;
824 :
825 : /* if tmp ends at the end of our range, just use that */
826 35299178 : if (xfs_refc_next(&tmp) == agbno + aglen)
827 30598089 : *cright = tmp;
828 : else {
829 : /*
830 : * There's a gap in the refcntbt at the end of the
831 : * range we're interested in (refcount == 1) so
832 : * create the implied extent and pass it back.
833 : * We assume here that the agbno/aglen range was
834 : * passed in from a data fork extent mapping and
835 : * therefore is allocated to exactly one owner.
836 : */
837 4701089 : cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
838 4701089 : cright->rc_blockcount = right->rc_startblock -
839 : cright->rc_startblock;
840 4701089 : cright->rc_refcount = 1;
841 4701089 : cright->rc_domain = domain;
842 : }
843 : } else {
844 40885 : not_found:
845 : /*
846 : * No extents, so pretend that there's one covering the whole
847 : * range.
848 : */
849 41746 : cright->rc_startblock = agbno;
850 41746 : cright->rc_blockcount = aglen;
851 41746 : cright->rc_refcount = 1;
852 41746 : cright->rc_domain = domain;
853 : }
854 35340924 : trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
855 : cright, right, agbno + aglen);
856 35340924 : return error;
857 :
858 10 : out_error:
859 20 : trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
860 10 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
861 10 : return error;
862 : }
863 :
864 : /* Is this extent valid? */
865 : static inline bool
866 : xfs_refc_valid(
867 : const struct xfs_refcount_irec *rc)
868 : {
869 646843630 : return rc->rc_startblock != NULLAGBLOCK;
870 : }
871 :
872 : static inline xfs_nlink_t
873 : xfs_refc_merge_refcount(
874 : const struct xfs_refcount_irec *irec,
875 : enum xfs_refc_adjust_op adjust)
876 : {
877 : /* Once a record hits MAXREFCOUNT, it is pinned there forever */
878 85534961 : if (irec->rc_refcount == MAXREFCOUNT)
879 : return MAXREFCOUNT;
880 85534931 : return irec->rc_refcount + adjust;
881 : }
882 :
883 : static inline bool
884 53498988 : xfs_refc_want_merge_center(
885 : const struct xfs_refcount_irec *left,
886 : const struct xfs_refcount_irec *cleft,
887 : const struct xfs_refcount_irec *cright,
888 : const struct xfs_refcount_irec *right,
889 : bool cleft_is_cright,
890 : enum xfs_refc_adjust_op adjust,
891 : unsigned long long *ulenp)
892 : {
893 53498988 : unsigned long long ulen = left->rc_blockcount;
894 53498988 : xfs_nlink_t new_refcount;
895 :
896 : /*
897 : * To merge with a center record, both shoulder records must be
898 : * adjacent to the record we want to adjust. This is only true if
899 : * find_left and find_right made all four records valid.
900 : */
901 53498988 : if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
902 19542167 : !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
903 : return false;
904 :
905 : /* There must only be one record for the entire range. */
906 19542167 : if (!cleft_is_cright)
907 : return false;
908 :
909 : /* The shoulder record refcounts must match the new refcount. */
910 18524647 : new_refcount = xfs_refc_merge_refcount(cleft, adjust);
911 18524647 : if (left->rc_refcount != new_refcount)
912 : return false;
913 3555281 : if (right->rc_refcount != new_refcount)
914 : return false;
915 :
916 : /*
917 : * The new record cannot exceed the max length. ulen is a ULL as the
918 : * individual record block counts can be up to (u32 - 1) in length
919 : * hence we need to catch u32 addition overflows here.
920 : */
921 2475445 : ulen += cleft->rc_blockcount + right->rc_blockcount;
922 2475445 : if (ulen >= MAXREFCEXTLEN)
923 : return false;
924 :
925 2475445 : *ulenp = ulen;
926 2475445 : return true;
927 : }
928 :
929 : static inline bool
930 51023439 : xfs_refc_want_merge_left(
931 : const struct xfs_refcount_irec *left,
932 : const struct xfs_refcount_irec *cleft,
933 : enum xfs_refc_adjust_op adjust)
934 : {
935 51023439 : unsigned long long ulen = left->rc_blockcount;
936 51023439 : xfs_nlink_t new_refcount;
937 :
938 : /*
939 : * For a left merge, the left shoulder record must be adjacent to the
940 : * start of the range. If this is true, find_left made left and cleft
941 : * contain valid contents.
942 : */
943 51023439 : if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
944 : return false;
945 :
946 : /* Left shoulder record refcount must match the new refcount. */
947 35224817 : new_refcount = xfs_refc_merge_refcount(cleft, adjust);
948 35224817 : if (left->rc_refcount != new_refcount)
949 : return false;
950 :
951 : /*
952 : * The new record cannot exceed the max length. ulen is a ULL as the
953 : * individual record block counts can be up to (u32 - 1) in length
954 : * hence we need to catch u32 addition overflows here.
955 : */
956 8865324 : ulen += cleft->rc_blockcount;
957 8865324 : if (ulen >= MAXREFCEXTLEN)
958 0 : return false;
959 :
960 : return true;
961 : }
962 :
963 : static inline bool
964 49943604 : xfs_refc_want_merge_right(
965 : const struct xfs_refcount_irec *cright,
966 : const struct xfs_refcount_irec *right,
967 : enum xfs_refc_adjust_op adjust)
968 : {
969 49943604 : unsigned long long ulen = right->rc_blockcount;
970 49943604 : xfs_nlink_t new_refcount;
971 :
972 : /*
973 : * For a right merge, the right shoulder record must be adjacent to the
974 : * end of the range. If this is true, find_right made cright and right
975 : * contain valid contents.
976 : */
977 49943604 : if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
978 : return false;
979 :
980 : /* Right shoulder record refcount must match the new refcount. */
981 31785497 : new_refcount = xfs_refc_merge_refcount(cright, adjust);
982 31785497 : if (right->rc_refcount != new_refcount)
983 : return false;
984 :
985 : /*
986 : * The new record cannot exceed the max length. ulen is a ULL as the
987 : * individual record block counts can be up to (u32 - 1) in length
988 : * hence we need to catch u32 addition overflows here.
989 : */
990 4393684 : ulen += cright->rc_blockcount;
991 4393684 : if (ulen >= MAXREFCEXTLEN)
992 0 : return false;
993 :
994 : return true;
995 : }
996 :
997 : /*
998 : * Try to merge with any extents on the boundaries of the adjustment range.
999 : */
1000 : STATIC int
1001 193141066 : xfs_refcount_merge_extents(
1002 : struct xfs_btree_cur *cur,
1003 : enum xfs_refc_domain domain,
1004 : xfs_agblock_t *agbno,
1005 : xfs_extlen_t *aglen,
1006 : enum xfs_refc_adjust_op adjust,
1007 : bool *shape_changed)
1008 : {
1009 193141066 : struct xfs_refcount_irec left = {0}, cleft = {0};
1010 193141066 : struct xfs_refcount_irec cright = {0}, right = {0};
1011 193141066 : int error;
1012 193141066 : unsigned long long ulen;
1013 193141066 : bool cequal;
1014 :
1015 193141066 : *shape_changed = false;
1016 : /*
1017 : * Find the extent just below agbno [left], just above agbno [cleft],
1018 : * just below (agbno + aglen) [cright], and just above (agbno + aglen)
1019 : * [right].
1020 : */
1021 193141066 : error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain,
1022 : *agbno, *aglen);
1023 193141588 : if (error)
1024 : return error;
1025 193141661 : error = xfs_refcount_find_right_extents(cur, &right, &cright, domain,
1026 : *agbno, *aglen);
1027 193141475 : if (error)
1028 : return error;
1029 :
1030 : /* No left or right extent to merge; exit. */
1031 193141465 : if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
1032 : return 0;
1033 :
1034 53498949 : cequal = (cleft.rc_startblock == cright.rc_startblock) &&
1035 18524647 : (cleft.rc_blockcount == cright.rc_blockcount);
1036 :
1037 : /* Try to merge left, cleft, and right. cleft must == cright. */
1038 53498949 : if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
1039 : adjust, &ulen)) {
1040 2475445 : *shape_changed = true;
1041 2475445 : return xfs_refcount_merge_center_extents(cur, &left, &cleft,
1042 : &right, ulen, aglen);
1043 : }
1044 :
1045 : /* Try to merge left and cleft. */
1046 51023327 : if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
1047 8865315 : *shape_changed = true;
1048 8865315 : error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
1049 : agbno, aglen);
1050 8865322 : if (error)
1051 : return error;
1052 :
1053 : /*
1054 : * If we just merged left + cleft and cleft == cright,
1055 : * we no longer have a cright to merge with right. We're done.
1056 : */
1057 8865322 : if (cequal)
1058 : return 0;
1059 : }
1060 :
1061 : /* Try to merge cright and right. */
1062 49943498 : if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
1063 4393684 : *shape_changed = true;
1064 4393684 : return xfs_refcount_merge_right_extent(cur, &right, &cright,
1065 : aglen);
1066 : }
1067 :
1068 : return 0;
1069 : }
1070 :
1071 : /*
1072 : * XXX: This is a pretty hand-wavy estimate. The penalty for guessing
1073 : * true incorrectly is a shutdown FS; the penalty for guessing false
1074 : * incorrectly is more transaction rolls than might be necessary.
1075 : * Be conservative here.
1076 : */
1077 : static bool
1078 200894891 : xfs_refcount_still_have_space(
1079 : struct xfs_btree_cur *cur)
1080 : {
1081 200894891 : unsigned long overhead;
1082 :
1083 : /*
1084 : * Worst case estimate: full splits of the free space and rmap btrees
1085 : * to handle each of the shape changes to the refcount btree.
1086 : */
1087 200894891 : overhead = xfs_allocfree_block_count(cur->bc_mp,
1088 : cur->bc_ag.refc.shape_changes);
1089 200893525 : overhead += cur->bc_mp->m_refc_maxlevels;
1090 200893525 : overhead *= cur->bc_mp->m_sb.sb_blocksize;
1091 :
1092 : /*
1093 : * Only allow 2 refcount extent updates per transaction if the
1094 : * refcount continue update "error" has been injected.
1095 : */
1096 230202426 : if (cur->bc_ag.refc.nr_ops > 2 &&
1097 29308903 : XFS_TEST_ERROR(false, cur->bc_mp,
1098 : XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
1099 : return false;
1100 :
1101 200893271 : if (cur->bc_ag.refc.nr_ops == 0)
1102 : return true;
1103 42110597 : else if (overhead > cur->bc_tp->t_log_res)
1104 : return false;
1105 42110597 : return cur->bc_tp->t_log_res - overhead >
1106 42110597 : cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
1107 : }
1108 :
1109 : /*
1110 : * Adjust the refcounts of middle extents. At this point we should have
1111 : * split extents that crossed the adjustment range; merged with adjacent
1112 : * extents; and updated agbno/aglen to reflect the merges. Therefore,
1113 : * all we have to do is update the extents inside [agbno, agbno + aglen].
1114 : */
1115 : STATIC int
1116 173763817 : xfs_refcount_adjust_extents(
1117 : struct xfs_btree_cur *cur,
1118 : xfs_agblock_t *agbno,
1119 : xfs_extlen_t *aglen,
1120 : enum xfs_refc_adjust_op adj)
1121 : {
1122 173763817 : struct xfs_refcount_irec ext, tmp;
1123 173763817 : int error;
1124 173763817 : int found_rec, found_tmp;
1125 173763817 : xfs_fsblock_t fsbno;
1126 :
1127 : /* Merging did all the work already. */
1128 173763817 : if (*aglen == 0)
1129 : return 0;
1130 :
1131 159961534 : error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno,
1132 : &found_rec);
1133 159962932 : if (error)
1134 0 : goto out_error;
1135 :
1136 285692889 : while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
1137 194140843 : error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1138 194142127 : if (error)
1139 0 : goto out_error;
1140 194142127 : if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) {
1141 18648724 : ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1142 18648724 : ext.rc_blockcount = 0;
1143 18648724 : ext.rc_refcount = 0;
1144 18648724 : ext.rc_domain = XFS_REFC_DOMAIN_SHARED;
1145 : }
1146 :
1147 : /*
1148 : * Deal with a hole in the refcount tree; if a file maps to
1149 : * these blocks and there's no refcountbt record, pretend that
1150 : * there is one with refcount == 1.
1151 : */
1152 194142127 : if (ext.rc_startblock != *agbno) {
1153 75163824 : tmp.rc_startblock = *agbno;
1154 75163824 : tmp.rc_blockcount = min(*aglen,
1155 : ext.rc_startblock - *agbno);
1156 75163824 : tmp.rc_refcount = 1 + adj;
1157 75163824 : tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
1158 :
1159 75163824 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1160 75163824 : cur->bc_ag.pag->pag_agno, &tmp);
1161 :
1162 : /*
1163 : * Either cover the hole (increment) or
1164 : * delete the range (decrement).
1165 : */
1166 75163551 : cur->bc_ag.refc.nr_ops++;
1167 75163551 : if (tmp.rc_refcount) {
1168 29211259 : error = xfs_refcount_insert(cur, &tmp,
1169 : &found_tmp);
1170 29211255 : if (error)
1171 44 : goto out_error;
1172 29211211 : if (XFS_IS_CORRUPT(cur->bc_mp,
1173 : found_tmp != 1)) {
1174 0 : xfs_btree_mark_sick(cur);
1175 0 : error = -EFSCORRUPTED;
1176 0 : goto out_error;
1177 : }
1178 : } else {
1179 45952292 : fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1180 : cur->bc_ag.pag->pag_agno,
1181 : tmp.rc_startblock);
1182 45952292 : error = xfs_free_extent_later(cur->bc_tp, fsbno,
1183 45952292 : tmp.rc_blockcount, NULL,
1184 : XFS_AG_RESV_NONE);
1185 45951658 : if (error)
1186 0 : goto out_error;
1187 : }
1188 :
1189 75162869 : (*agbno) += tmp.rc_blockcount;
1190 75162869 : (*aglen) -= tmp.rc_blockcount;
1191 :
1192 : /* Stop if there's nothing left to modify */
1193 75162869 : if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
1194 : break;
1195 :
1196 : /* Move the cursor to the start of ext. */
1197 6751680 : error = xfs_refcount_lookup_ge(cur,
1198 : XFS_REFC_DOMAIN_SHARED, *agbno,
1199 : &found_rec);
1200 6751681 : if (error)
1201 0 : goto out_error;
1202 : }
1203 :
1204 : /*
1205 : * A previous step trimmed agbno/aglen such that the end of the
1206 : * range would not be in the middle of the record. If this is
1207 : * no longer the case, something is seriously wrong with the
1208 : * btree. Make sure we never feed the synthesized record into
1209 : * the processing loop below.
1210 : */
1211 125729984 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
1212 125729984 : XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
1213 0 : xfs_btree_mark_sick(cur);
1214 0 : error = -EFSCORRUPTED;
1215 0 : goto out_error;
1216 : }
1217 :
1218 : /*
1219 : * Adjust the reference count and either update the tree
1220 : * (incr) or free the blocks (decr).
1221 : */
1222 125729984 : if (ext.rc_refcount == MAXREFCOUNT)
1223 48 : goto skip;
1224 125729936 : ext.rc_refcount += adj;
1225 125729936 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1226 125729936 : cur->bc_ag.pag->pag_agno, &ext);
1227 125729886 : cur->bc_ag.refc.nr_ops++;
1228 125729886 : if (ext.rc_refcount > 1) {
1229 77880625 : error = xfs_refcount_update(cur, &ext);
1230 77880549 : if (error)
1231 0 : goto out_error;
1232 47849261 : } else if (ext.rc_refcount == 1) {
1233 47849261 : error = xfs_refcount_delete(cur, &found_rec);
1234 47849261 : if (error)
1235 0 : goto out_error;
1236 47849261 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1237 0 : xfs_btree_mark_sick(cur);
1238 0 : error = -EFSCORRUPTED;
1239 0 : goto out_error;
1240 : }
1241 47849261 : goto advloop;
1242 : } else {
1243 0 : fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1244 : cur->bc_ag.pag->pag_agno,
1245 : ext.rc_startblock);
1246 0 : error = xfs_free_extent_later(cur->bc_tp, fsbno,
1247 0 : ext.rc_blockcount, NULL,
1248 : XFS_AG_RESV_NONE);
1249 0 : if (error)
1250 0 : goto out_error;
1251 : }
1252 :
1253 0 : skip:
1254 77880597 : error = xfs_btree_increment(cur, 0, &found_rec);
1255 77880696 : if (error)
1256 0 : goto out_error;
1257 :
1258 77880696 : advloop:
1259 125729957 : (*agbno) += ext.rc_blockcount;
1260 125729957 : (*aglen) -= ext.rc_blockcount;
1261 : }
1262 :
1263 : return error;
1264 44 : out_error:
1265 88 : trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1266 44 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1267 44 : return error;
1268 : }
1269 :
1270 : /* Adjust the reference count of a range of AG blocks. */
1271 : STATIC int
1272 173764730 : xfs_refcount_adjust(
1273 : struct xfs_btree_cur *cur,
1274 : xfs_agblock_t *agbno,
1275 : xfs_extlen_t *aglen,
1276 : enum xfs_refc_adjust_op adj)
1277 : {
1278 173764730 : bool shape_changed;
1279 173764730 : int shape_changes = 0;
1280 173764730 : int error;
1281 :
1282 173764730 : if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
1283 78043946 : trace_xfs_refcount_increase(cur->bc_mp,
1284 78043946 : cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1285 : else
1286 95720784 : trace_xfs_refcount_decrease(cur->bc_mp,
1287 95720784 : cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1288 :
1289 : /*
1290 : * Ensure that no rcextents cross the boundary of the adjustment range.
1291 : */
1292 173763903 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1293 : *agbno, &shape_changed);
1294 173764316 : if (error)
1295 207 : goto out_error;
1296 173764109 : if (shape_changed)
1297 16180405 : shape_changes++;
1298 :
1299 173764109 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1300 173764109 : *agbno + *aglen, &shape_changed);
1301 173764959 : if (error)
1302 25 : goto out_error;
1303 173764934 : if (shape_changed)
1304 16154429 : shape_changes++;
1305 :
1306 : /*
1307 : * Try to merge with the left or right extents of the range.
1308 : */
1309 173764934 : error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
1310 : agbno, aglen, adj, &shape_changed);
1311 173764252 : if (error)
1312 10 : goto out_error;
1313 173764242 : if (shape_changed)
1314 14116580 : shape_changes++;
1315 173764242 : if (shape_changes)
1316 35416074 : cur->bc_ag.refc.shape_changes++;
1317 :
1318 : /* Now that we've taken care of the ends, adjust the middle extents */
1319 173764242 : error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj);
1320 173762542 : if (error)
1321 44 : goto out_error;
1322 :
1323 : return 0;
1324 :
1325 286 : out_error:
1326 572 : trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1327 286 : error, _RET_IP_);
1328 286 : return error;
1329 : }
1330 :
1331 : /* Clean up after calling xfs_refcount_finish_one. */
1332 : void
1333 191113107 : xfs_refcount_finish_one_cleanup(
1334 : struct xfs_trans *tp,
1335 : struct xfs_btree_cur *rcur,
1336 : int error)
1337 : {
1338 191596450 : struct xfs_buf *agbp;
1339 :
1340 191113107 : if (rcur == NULL)
1341 : return;
1342 191596140 : agbp = rcur->bc_ag.agbp;
1343 191112797 : xfs_btree_del_cursor(rcur, error);
1344 191114419 : if (error)
1345 1693 : xfs_trans_brelse(tp, agbp);
1346 : }
1347 :
1348 : /*
1349 : * Set up a continuation a deferred refcount operation by updating the intent.
1350 : * Checks to make sure we're not going to run off the end of the AG.
1351 : */
1352 : static inline int
1353 1406 : xfs_refcount_continue_op(
1354 : struct xfs_btree_cur *cur,
1355 : struct xfs_refcount_intent *ri,
1356 : xfs_agblock_t new_agbno)
1357 : {
1358 1406 : struct xfs_mount *mp = cur->bc_mp;
1359 1406 : struct xfs_perag *pag = cur->bc_ag.pag;
1360 :
1361 1406 : if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
1362 : ri->ri_blockcount))) {
1363 0 : xfs_btree_mark_sick(cur);
1364 0 : return -EFSCORRUPTED;
1365 : }
1366 :
1367 1406 : ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
1368 :
1369 1406 : ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
1370 1406 : ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
1371 :
1372 : return 0;
1373 : }
1374 :
1375 : /*
1376 : * Process one of the deferred refcount operations. We pass back the
1377 : * btree cursor to maintain our lock on the btree between calls.
1378 : * This saves time and eliminates a buffer deadlock between the
1379 : * superblock and the AGF because we'll always grab them in the same
1380 : * order.
1381 : */
1382 : int
1383 193135699 : xfs_refcount_finish_one(
1384 : struct xfs_trans *tp,
1385 : struct xfs_refcount_intent *ri,
1386 : struct xfs_btree_cur **pcur)
1387 : {
1388 193135699 : struct xfs_mount *mp = tp->t_mountp;
1389 193135699 : struct xfs_btree_cur *rcur;
1390 193135699 : struct xfs_buf *agbp = NULL;
1391 193135699 : int error = 0;
1392 193135699 : xfs_agblock_t bno;
1393 193135699 : unsigned long nr_ops = 0;
1394 193135699 : int shape_changes = 0;
1395 :
1396 193135699 : bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
1397 :
1398 193130397 : trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
1399 193130550 : ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
1400 : ri->ri_blockcount);
1401 :
1402 193131988 : if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
1403 : return -EIO;
1404 :
1405 : /*
1406 : * If we haven't gotten a cursor or the cursor AG doesn't match
1407 : * the startblock, get one now.
1408 : */
1409 193136265 : rcur = *pcur;
1410 193136265 : if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
1411 483343 : nr_ops = rcur->bc_ag.refc.nr_ops;
1412 483343 : shape_changes = rcur->bc_ag.refc.shape_changes;
1413 483343 : xfs_refcount_finish_one_cleanup(tp, rcur, 0);
1414 483343 : rcur = NULL;
1415 483343 : *pcur = NULL;
1416 : }
1417 193136265 : if (rcur == NULL) {
1418 191591369 : error = xfs_alloc_read_agf(ri->ri_pag, tp,
1419 : XFS_ALLOC_FLAG_FREEING, &agbp);
1420 191595599 : if (error)
1421 : return error;
1422 :
1423 191595398 : rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
1424 191596208 : rcur->bc_ag.refc.nr_ops = nr_ops;
1425 191596208 : rcur->bc_ag.refc.shape_changes = shape_changes;
1426 : }
1427 193141104 : *pcur = rcur;
1428 :
1429 193141104 : switch (ri->ri_type) {
1430 78043963 : case XFS_REFCOUNT_INCREASE:
1431 78043963 : error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1432 : XFS_REFCOUNT_ADJUST_INCREASE);
1433 78043953 : if (error)
1434 : return error;
1435 78043840 : if (ri->ri_blockcount > 0)
1436 126 : error = xfs_refcount_continue_op(rcur, ri, bno);
1437 : break;
1438 95721052 : case XFS_REFCOUNT_DECREASE:
1439 95721052 : error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1440 : XFS_REFCOUNT_ADJUST_DECREASE);
1441 95719061 : if (error)
1442 : return error;
1443 95718888 : if (ri->ri_blockcount > 0)
1444 1280 : error = xfs_refcount_continue_op(rcur, ri, bno);
1445 : break;
1446 3784496 : case XFS_REFCOUNT_ALLOC_COW:
1447 3784496 : error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount);
1448 3784416 : if (error)
1449 : return error;
1450 3784415 : ri->ri_blockcount = 0;
1451 3784415 : break;
1452 15591593 : case XFS_REFCOUNT_FREE_COW:
1453 15591593 : error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount);
1454 15592867 : if (error)
1455 : return error;
1456 15592867 : ri->ri_blockcount = 0;
1457 15592867 : break;
1458 0 : default:
1459 0 : ASSERT(0);
1460 0 : return -EFSCORRUPTED;
1461 : }
1462 193140010 : if (!error && ri->ri_blockcount > 0)
1463 1406 : trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno,
1464 1406 : ri->ri_type, bno, ri->ri_blockcount);
1465 : return error;
1466 : }
1467 :
1468 : /*
1469 : * Record a refcount intent for later processing.
1470 : */
1471 : static void
1472 193135469 : __xfs_refcount_add(
1473 : struct xfs_trans *tp,
1474 : enum xfs_refcount_intent_type type,
1475 : xfs_fsblock_t startblock,
1476 : xfs_extlen_t blockcount)
1477 : {
1478 193135469 : struct xfs_refcount_intent *ri;
1479 :
1480 579399387 : trace_xfs_refcount_defer(tp->t_mountp,
1481 193131959 : XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
1482 193135469 : type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
1483 : blockcount);
1484 :
1485 193131753 : ri = kmem_cache_alloc(xfs_refcount_intent_cache,
1486 : GFP_NOFS | __GFP_NOFAIL);
1487 193138831 : INIT_LIST_HEAD(&ri->ri_list);
1488 193138831 : ri->ri_type = type;
1489 193138831 : ri->ri_startblock = startblock;
1490 193138831 : ri->ri_blockcount = blockcount;
1491 :
1492 193138831 : xfs_refcount_update_get_group(tp->t_mountp, ri);
1493 193140084 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
1494 193140547 : }
1495 :
1496 : /*
1497 : * Increase the reference count of the blocks backing a file's extent.
1498 : */
1499 : void
1500 78043709 : xfs_refcount_increase_extent(
1501 : struct xfs_trans *tp,
1502 : struct xfs_bmbt_irec *PREV)
1503 : {
1504 78043709 : if (!xfs_has_reflink(tp->t_mountp))
1505 : return;
1506 :
1507 78043700 : __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock,
1508 78043700 : PREV->br_blockcount);
1509 : }
1510 :
1511 : /*
1512 : * Decrease the reference count of the blocks backing a file's extent.
1513 : */
1514 : void
1515 95717944 : xfs_refcount_decrease_extent(
1516 : struct xfs_trans *tp,
1517 : struct xfs_bmbt_irec *PREV)
1518 : {
1519 95717944 : if (!xfs_has_reflink(tp->t_mountp))
1520 : return;
1521 :
1522 95717650 : __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock,
1523 95717650 : PREV->br_blockcount);
1524 : }
1525 :
1526 : /*
1527 : * Given an AG extent, find the lowest-numbered run of shared blocks
1528 : * within that range and return the range in fbno/flen. If
1529 : * find_end_of_shared is set, return the longest contiguous extent of
1530 : * shared blocks; if not, just return the first extent we find. If no
1531 : * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
1532 : * and 0, respectively.
1533 : */
1534 : int
1535 1036105509 : xfs_refcount_find_shared(
1536 : struct xfs_btree_cur *cur,
1537 : xfs_agblock_t agbno,
1538 : xfs_extlen_t aglen,
1539 : xfs_agblock_t *fbno,
1540 : xfs_extlen_t *flen,
1541 : bool find_end_of_shared)
1542 : {
1543 1036105509 : struct xfs_refcount_irec tmp;
1544 1036105509 : int i;
1545 1036105509 : int have;
1546 1036105509 : int error;
1547 :
1548 1036105509 : trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1549 : agbno, aglen);
1550 :
1551 : /* By default, skip the whole range */
1552 1033873262 : *fbno = NULLAGBLOCK;
1553 1033873262 : *flen = 0;
1554 :
1555 : /* Try to find a refcount extent that crosses the start */
1556 1033873262 : error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno,
1557 : &have);
1558 1036529979 : if (error)
1559 60 : goto out_error;
1560 1036529919 : if (!have) {
1561 : /* No left extent, look at the next one */
1562 109307436 : error = xfs_btree_increment(cur, 0, &have);
1563 108863528 : if (error)
1564 0 : goto out_error;
1565 108863528 : if (!have)
1566 88277545 : goto done;
1567 : }
1568 947808468 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1569 946884032 : if (error)
1570 0 : goto out_error;
1571 946884032 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1572 0 : xfs_btree_mark_sick(cur);
1573 0 : error = -EFSCORRUPTED;
1574 0 : goto out_error;
1575 : }
1576 946884032 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1577 2117274 : goto done;
1578 :
1579 : /* If the extent ends before the start, look at the next one */
1580 944766758 : if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
1581 741345191 : error = xfs_btree_increment(cur, 0, &have);
1582 741600507 : if (error)
1583 0 : goto out_error;
1584 741600507 : if (!have)
1585 21481584 : goto done;
1586 720118923 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1587 719913642 : if (error)
1588 0 : goto out_error;
1589 719913642 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1590 0 : xfs_btree_mark_sick(cur);
1591 0 : error = -EFSCORRUPTED;
1592 0 : goto out_error;
1593 : }
1594 719913642 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1595 25373679 : goto done;
1596 : }
1597 :
1598 : /* If the extent starts after the range we want, bail out */
1599 897961530 : if (tmp.rc_startblock >= agbno + aglen)
1600 709954232 : goto done;
1601 :
1602 : /* We found the start of a shared extent! */
1603 188007298 : if (tmp.rc_startblock < agbno) {
1604 3057232 : tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
1605 3057232 : tmp.rc_startblock = agbno;
1606 : }
1607 :
1608 188007298 : *fbno = tmp.rc_startblock;
1609 188007298 : *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
1610 188007298 : if (!find_end_of_shared)
1611 185828708 : goto done;
1612 :
1613 : /* Otherwise, find the end of this shared extent */
1614 3200890 : while (*fbno + *flen < agbno + aglen) {
1615 1142369 : error = xfs_btree_increment(cur, 0, &have);
1616 1102760 : if (error)
1617 0 : goto out_error;
1618 1102760 : if (!have)
1619 : break;
1620 1096729 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1621 1096729 : if (error)
1622 0 : goto out_error;
1623 1096729 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1624 0 : xfs_btree_mark_sick(cur);
1625 0 : error = -EFSCORRUPTED;
1626 0 : goto out_error;
1627 : }
1628 1096729 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED ||
1629 1086212 : tmp.rc_startblock >= agbno + aglen ||
1630 1031884 : tmp.rc_startblock != *fbno + *flen)
1631 : break;
1632 1022300 : *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
1633 : }
1634 :
1635 2138981 : done:
1636 1035172003 : trace_xfs_refcount_find_shared_result(cur->bc_mp,
1637 1035172003 : cur->bc_ag.pag->pag_agno, *fbno, *flen);
1638 :
1639 1034577887 : out_error:
1640 1034577887 : if (error)
1641 120 : trace_xfs_refcount_find_shared_error(cur->bc_mp,
1642 60 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1643 1034577887 : return error;
1644 : }
1645 :
1646 : /*
1647 : * Recovering CoW Blocks After a Crash
1648 : *
1649 : * Due to the way that the copy on write mechanism works, there's a window of
1650 : * opportunity in which we can lose track of allocated blocks during a crash.
1651 : * Because CoW uses delayed allocation in the in-core CoW fork, writeback
1652 : * causes blocks to be allocated and stored in the CoW fork. The blocks are
1653 : * no longer in the free space btree but are not otherwise recorded anywhere
1654 : * until the write completes and the blocks are mapped into the file. A crash
1655 : * in between allocation and remapping results in the replacement blocks being
1656 : * lost. This situation is exacerbated by the CoW extent size hint because
1657 : * allocations can hang around for long time.
1658 : *
1659 : * However, there is a place where we can record these allocations before they
1660 : * become mappings -- the reference count btree. The btree does not record
1661 : * extents with refcount == 1, so we can record allocations with a refcount of
1662 : * 1. Blocks being used for CoW writeout cannot be shared, so there should be
1663 : * no conflict with shared block records. These mappings should be created
1664 : * when we allocate blocks to the CoW fork and deleted when they're removed
1665 : * from the CoW fork.
1666 : *
1667 : * Minor nit: records for in-progress CoW allocations and records for shared
1668 : * extents must never be merged, to preserve the property that (except for CoW
1669 : * allocations) there are no refcount btree entries with refcount == 1. The
1670 : * only time this could potentially happen is when unsharing a block that's
1671 : * adjacent to CoW allocations, so we must be careful to avoid this.
1672 : *
1673 : * At mount time we recover lost CoW allocations by searching the refcount
1674 : * btree for these refcount == 1 mappings. These represent CoW allocations
1675 : * that were in progress at the time the filesystem went down, so we can free
1676 : * them to get the space back.
1677 : *
1678 : * This mechanism is superior to creating EFIs for unmapped CoW extents for
1679 : * several reasons -- first, EFIs pin the tail of the log and would have to be
1680 : * periodically relogged to avoid filling up the log. Second, CoW completions
1681 : * will have to file an EFD and create new EFIs for whatever remains in the
1682 : * CoW fork; this partially takes care of (1) but extent-size reservations
1683 : * will have to periodically relog even if there's no writeout in progress.
1684 : * This can happen if the CoW extent size hint is set, which you really want.
1685 : * Third, EFIs cannot currently be automatically relogged into newer
1686 : * transactions to advance the log tail. Fourth, stuffing the log full of
1687 : * EFIs places an upper bound on the number of CoW allocations that can be
1688 : * held filesystem-wide at any given time. Recording them in the refcount
1689 : * btree doesn't require us to maintain any state in memory and doesn't pin
1690 : * the log.
1691 : */
1692 : /*
1693 : * Adjust the refcounts of CoW allocations. These allocations are "magic"
1694 : * in that they're not referenced anywhere else in the filesystem, so we
1695 : * stash them in the refcount btree with a refcount of 1 until either file
1696 : * remapping (or CoW cancellation) happens.
1697 : */
1698 : STATIC int
1699 19376094 : xfs_refcount_adjust_cow_extents(
1700 : struct xfs_btree_cur *cur,
1701 : xfs_agblock_t agbno,
1702 : xfs_extlen_t aglen,
1703 : enum xfs_refc_adjust_op adj)
1704 : {
1705 19376094 : struct xfs_refcount_irec ext, tmp;
1706 19376094 : int error;
1707 19376094 : int found_rec, found_tmp;
1708 :
1709 19376094 : if (aglen == 0)
1710 : return 0;
1711 :
1712 : /* Find any overlapping refcount records */
1713 17766992 : error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno,
1714 : &found_rec);
1715 17768739 : if (error)
1716 0 : goto out_error;
1717 17768739 : error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1718 17768002 : if (error)
1719 0 : goto out_error;
1720 17768002 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
1721 : ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
1722 0 : xfs_btree_mark_sick(cur);
1723 0 : error = -EFSCORRUPTED;
1724 0 : goto out_error;
1725 : }
1726 17768002 : if (!found_rec) {
1727 1151285 : ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1728 1151285 : ext.rc_blockcount = 0;
1729 1151285 : ext.rc_refcount = 0;
1730 1151285 : ext.rc_domain = XFS_REFC_DOMAIN_COW;
1731 : }
1732 :
1733 17768002 : switch (adj) {
1734 2175578 : case XFS_REFCOUNT_ADJUST_COW_ALLOC:
1735 : /* Adding a CoW reservation, there should be nothing here. */
1736 2175578 : if (XFS_IS_CORRUPT(cur->bc_mp,
1737 : agbno + aglen > ext.rc_startblock)) {
1738 0 : xfs_btree_mark_sick(cur);
1739 0 : error = -EFSCORRUPTED;
1740 0 : goto out_error;
1741 : }
1742 :
1743 2175578 : tmp.rc_startblock = agbno;
1744 2175578 : tmp.rc_blockcount = aglen;
1745 2175578 : tmp.rc_refcount = 1;
1746 2175578 : tmp.rc_domain = XFS_REFC_DOMAIN_COW;
1747 :
1748 2175578 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1749 2175578 : cur->bc_ag.pag->pag_agno, &tmp);
1750 :
1751 2175499 : error = xfs_refcount_insert(cur, &tmp,
1752 : &found_tmp);
1753 2175446 : if (error)
1754 0 : goto out_error;
1755 2175446 : if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) {
1756 0 : xfs_btree_mark_sick(cur);
1757 0 : error = -EFSCORRUPTED;
1758 0 : goto out_error;
1759 : }
1760 : break;
1761 15592424 : case XFS_REFCOUNT_ADJUST_COW_FREE:
1762 : /* Removing a CoW reservation, there should be one extent. */
1763 15592424 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) {
1764 0 : xfs_btree_mark_sick(cur);
1765 0 : error = -EFSCORRUPTED;
1766 0 : goto out_error;
1767 : }
1768 15592424 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) {
1769 0 : xfs_btree_mark_sick(cur);
1770 0 : error = -EFSCORRUPTED;
1771 0 : goto out_error;
1772 : }
1773 15592424 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) {
1774 0 : xfs_btree_mark_sick(cur);
1775 0 : error = -EFSCORRUPTED;
1776 0 : goto out_error;
1777 : }
1778 :
1779 15592424 : ext.rc_refcount = 0;
1780 15592424 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1781 15592424 : cur->bc_ag.pag->pag_agno, &ext);
1782 15591369 : error = xfs_refcount_delete(cur, &found_rec);
1783 15593090 : if (error)
1784 0 : goto out_error;
1785 15593090 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1786 0 : xfs_btree_mark_sick(cur);
1787 0 : error = -EFSCORRUPTED;
1788 0 : goto out_error;
1789 : }
1790 : break;
1791 0 : default:
1792 0 : ASSERT(0);
1793 : }
1794 :
1795 : return error;
1796 0 : out_error:
1797 0 : trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1798 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1799 0 : return error;
1800 : }
1801 :
1802 : /*
1803 : * Add or remove refcount btree entries for CoW reservations.
1804 : */
1805 : STATIC int
1806 19373621 : xfs_refcount_adjust_cow(
1807 : struct xfs_btree_cur *cur,
1808 : xfs_agblock_t agbno,
1809 : xfs_extlen_t aglen,
1810 : enum xfs_refc_adjust_op adj)
1811 : {
1812 19373621 : bool shape_changed;
1813 19373621 : int error;
1814 :
1815 : /*
1816 : * Ensure that no rcextents cross the boundary of the adjustment range.
1817 : */
1818 19373621 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1819 : agbno, &shape_changed);
1820 19373870 : if (error)
1821 1 : goto out_error;
1822 :
1823 19373869 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1824 : agbno + aglen, &shape_changed);
1825 19377461 : if (error)
1826 0 : goto out_error;
1827 :
1828 : /*
1829 : * Try to merge with the left or right extents of the range.
1830 : */
1831 19377461 : error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno,
1832 : &aglen, adj, &shape_changed);
1833 19376438 : if (error)
1834 0 : goto out_error;
1835 :
1836 : /* Now that we've taken care of the ends, adjust the middle extents */
1837 19376438 : error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
1838 19377267 : if (error)
1839 0 : goto out_error;
1840 :
1841 : return 0;
1842 :
1843 1 : out_error:
1844 2 : trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1845 1 : error, _RET_IP_);
1846 1 : return error;
1847 : }
1848 :
1849 : /*
1850 : * Record a CoW allocation in the refcount btree.
1851 : */
1852 : STATIC int
1853 3784399 : __xfs_refcount_cow_alloc(
1854 : struct xfs_btree_cur *rcur,
1855 : xfs_agblock_t agbno,
1856 : xfs_extlen_t aglen)
1857 : {
1858 3784399 : trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1859 : agbno, aglen);
1860 :
1861 : /* Add refcount btree reservation */
1862 3784228 : return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1863 : XFS_REFCOUNT_ADJUST_COW_ALLOC);
1864 : }
1865 :
1866 : /*
1867 : * Remove a CoW allocation from the refcount btree.
1868 : */
1869 : STATIC int
1870 15590183 : __xfs_refcount_cow_free(
1871 : struct xfs_btree_cur *rcur,
1872 : xfs_agblock_t agbno,
1873 : xfs_extlen_t aglen)
1874 : {
1875 15590183 : trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1876 : agbno, aglen);
1877 :
1878 : /* Remove refcount btree reservation */
1879 15588479 : return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1880 : XFS_REFCOUNT_ADJUST_COW_FREE);
1881 : }
1882 :
1883 : /* Record a CoW staging extent in the refcount btree. */
1884 : void
1885 3784491 : xfs_refcount_alloc_cow_extent(
1886 : struct xfs_trans *tp,
1887 : xfs_fsblock_t fsb,
1888 : xfs_extlen_t len)
1889 : {
1890 3784491 : struct xfs_mount *mp = tp->t_mountp;
1891 :
1892 3784491 : if (!xfs_has_reflink(mp))
1893 : return;
1894 :
1895 3784477 : __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
1896 :
1897 : /* Add rmap entry */
1898 3784524 : xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1899 3784524 : XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1900 : }
1901 :
1902 : /* Forget a CoW staging event in the refcount btree. */
1903 : void
1904 15591142 : xfs_refcount_free_cow_extent(
1905 : struct xfs_trans *tp,
1906 : xfs_fsblock_t fsb,
1907 : xfs_extlen_t len)
1908 : {
1909 15591142 : struct xfs_mount *mp = tp->t_mountp;
1910 :
1911 15591142 : if (!xfs_has_reflink(mp))
1912 : return;
1913 :
1914 : /* Remove rmap entry */
1915 15590031 : xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1916 15590983 : XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1917 15590723 : __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
1918 : }
1919 :
1920 : struct xfs_refcount_recovery {
1921 : struct list_head rr_list;
1922 : struct xfs_refcount_irec rr_rrec;
1923 : };
1924 :
1925 : /* Stuff an extent on the recovery list. */
1926 : STATIC int
1927 315192 : xfs_refcount_recover_extent(
1928 : struct xfs_btree_cur *cur,
1929 : const union xfs_btree_rec *rec,
1930 : void *priv)
1931 : {
1932 315192 : struct list_head *debris = priv;
1933 315192 : struct xfs_refcount_recovery *rr;
1934 :
1935 315192 : if (XFS_IS_CORRUPT(cur->bc_mp,
1936 : be32_to_cpu(rec->refc.rc_refcount) != 1)) {
1937 0 : xfs_btree_mark_sick(cur);
1938 0 : return -EFSCORRUPTED;
1939 : }
1940 :
1941 315192 : rr = kmalloc(sizeof(struct xfs_refcount_recovery),
1942 : GFP_KERNEL | __GFP_NOFAIL);
1943 315192 : INIT_LIST_HEAD(&rr->rr_list);
1944 315192 : xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
1945 :
1946 315192 : if (xfs_refcount_check_irec(cur, &rr->rr_rrec) != NULL ||
1947 315192 : XFS_IS_CORRUPT(cur->bc_mp,
1948 : rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
1949 0 : xfs_btree_mark_sick(cur);
1950 0 : kfree(rr);
1951 0 : return -EFSCORRUPTED;
1952 : }
1953 :
1954 315192 : list_add_tail(&rr->rr_list, debris);
1955 315192 : return 0;
1956 : }
1957 :
1958 : /* Find and remove leftover CoW reservations. */
1959 : int
1960 52422 : xfs_refcount_recover_cow_leftovers(
1961 : struct xfs_mount *mp,
1962 : struct xfs_perag *pag)
1963 : {
1964 52422 : struct xfs_trans *tp;
1965 52422 : struct xfs_btree_cur *cur;
1966 52422 : struct xfs_buf *agbp;
1967 52422 : struct xfs_refcount_recovery *rr, *n;
1968 52422 : struct list_head debris;
1969 52422 : union xfs_btree_irec low = {
1970 : .rc.rc_domain = XFS_REFC_DOMAIN_COW,
1971 : };
1972 52422 : union xfs_btree_irec high = {
1973 : .rc.rc_domain = XFS_REFC_DOMAIN_COW,
1974 : .rc.rc_startblock = -1U,
1975 : };
1976 52422 : xfs_fsblock_t fsb;
1977 52422 : int error;
1978 :
1979 : /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */
1980 52422 : BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG);
1981 52422 : if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS)
1982 : return -EOPNOTSUPP;
1983 :
1984 52422 : INIT_LIST_HEAD(&debris);
1985 :
1986 : /*
1987 : * In this first part, we use an empty transaction to gather up
1988 : * all the leftover CoW extents so that we can subsequently
1989 : * delete them. The empty transaction is used to avoid
1990 : * a buffer lock deadlock if there happens to be a loop in the
1991 : * refcountbt because we're allowed to re-grab a buffer that is
1992 : * already attached to our transaction. When we're done
1993 : * recording the CoW debris we cancel the (empty) transaction
1994 : * and everything goes away cleanly.
1995 : */
1996 52422 : error = xfs_trans_alloc_empty(mp, &tp);
1997 52422 : if (error)
1998 : return error;
1999 :
2000 52422 : error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
2001 52422 : if (error)
2002 10 : goto out_trans;
2003 52412 : cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
2004 :
2005 : /* Find all the leftover CoW staging extents. */
2006 52412 : error = xfs_btree_query_range(cur, &low, &high,
2007 : xfs_refcount_recover_extent, &debris);
2008 52412 : xfs_btree_del_cursor(cur, error);
2009 52412 : xfs_trans_brelse(tp, agbp);
2010 52412 : xfs_trans_cancel(tp);
2011 52412 : if (error)
2012 12 : goto out_free;
2013 :
2014 : /* Now iterate the list to free the leftovers */
2015 367592 : list_for_each_entry_safe(rr, n, &debris, rr_list) {
2016 : /* Set up transaction. */
2017 315192 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
2018 315192 : if (error)
2019 0 : goto out_free;
2020 :
2021 315192 : trace_xfs_refcount_recover_extent(mp, pag->pag_agno,
2022 : &rr->rr_rrec);
2023 :
2024 : /* Free the orphan record */
2025 315192 : fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
2026 : rr->rr_rrec.rc_startblock);
2027 315192 : xfs_refcount_free_cow_extent(tp, fsb,
2028 : rr->rr_rrec.rc_blockcount);
2029 :
2030 : /* Free the block. */
2031 315192 : error = xfs_free_extent_later(tp, fsb,
2032 315192 : rr->rr_rrec.rc_blockcount, NULL,
2033 : XFS_AG_RESV_NONE);
2034 315192 : if (error)
2035 0 : goto out_trans;
2036 :
2037 315192 : error = xfs_trans_commit(tp);
2038 315192 : if (error)
2039 0 : goto out_free;
2040 :
2041 315192 : list_del(&rr->rr_list);
2042 315192 : kfree(rr);
2043 : }
2044 :
2045 : return error;
2046 10 : out_trans:
2047 10 : xfs_trans_cancel(tp);
2048 22 : out_free:
2049 : /* Free the leftover list */
2050 22 : list_for_each_entry_safe(rr, n, &debris, rr_list) {
2051 0 : list_del(&rr->rr_list);
2052 0 : kfree(rr);
2053 : }
2054 : return error;
2055 : }
2056 :
2057 : /*
2058 : * Scan part of the keyspace of the refcount records and tell us if the area
2059 : * has no records, is fully mapped by records, or is partially filled.
2060 : */
2061 : int
2062 1771950116 : xfs_refcount_has_records(
2063 : struct xfs_btree_cur *cur,
2064 : enum xfs_refc_domain domain,
2065 : xfs_agblock_t bno,
2066 : xfs_extlen_t len,
2067 : enum xbtree_recpacking *outcome)
2068 : {
2069 1771950116 : union xfs_btree_irec low;
2070 1771950116 : union xfs_btree_irec high;
2071 :
2072 1771950116 : memset(&low, 0, sizeof(low));
2073 1771950116 : low.rc.rc_startblock = bno;
2074 1771950116 : memset(&high, 0xFF, sizeof(high));
2075 1771950116 : high.rc.rc_startblock = bno + len - 1;
2076 1771950116 : low.rc.rc_domain = high.rc.rc_domain = domain;
2077 :
2078 1771950116 : return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
2079 : }
2080 :
2081 : struct xfs_refcount_query_range_info {
2082 : xfs_refcount_query_range_fn fn;
2083 : void *priv;
2084 : };
2085 :
2086 : /* Format btree record and pass to our callback. */
2087 : STATIC int
2088 493731 : xfs_refcount_query_range_helper(
2089 : struct xfs_btree_cur *cur,
2090 : const union xfs_btree_rec *rec,
2091 : void *priv)
2092 : {
2093 493731 : struct xfs_refcount_query_range_info *query = priv;
2094 493731 : struct xfs_refcount_irec irec;
2095 493731 : xfs_failaddr_t fa;
2096 :
2097 493731 : xfs_refcount_btrec_to_irec(rec, &irec);
2098 493731 : fa = xfs_refcount_check_irec(cur, &irec);
2099 493731 : if (fa)
2100 0 : return xfs_refcount_complain_bad_rec(cur, fa, &irec);
2101 :
2102 493731 : return query->fn(cur, &irec, query->priv);
2103 : }
2104 :
2105 : /* Find all refcount records between two keys. */
2106 : int
2107 61899 : xfs_refcount_query_range(
2108 : struct xfs_btree_cur *cur,
2109 : const struct xfs_refcount_irec *low_rec,
2110 : const struct xfs_refcount_irec *high_rec,
2111 : xfs_refcount_query_range_fn fn,
2112 : void *priv)
2113 : {
2114 61899 : union xfs_btree_irec low_brec = { .rc = *low_rec };
2115 61899 : union xfs_btree_irec high_brec = { .rc = *high_rec };
2116 61899 : struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn };
2117 :
2118 61899 : return xfs_btree_query_range(cur, &low_brec, &high_brec,
2119 : xfs_refcount_query_range_helper, &query);
2120 : }
2121 :
2122 : int __init
2123 50 : xfs_refcount_intent_init_cache(void)
2124 : {
2125 50 : xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent",
2126 : sizeof(struct xfs_refcount_intent),
2127 : 0, 0, NULL);
2128 :
2129 50 : return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM;
2130 : }
2131 :
2132 : void
2133 49 : xfs_refcount_intent_destroy_cache(void)
2134 : {
2135 49 : kmem_cache_destroy(xfs_refcount_intent_cache);
2136 49 : xfs_refcount_intent_cache = NULL;
2137 49 : }
|