Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0+
2 : /*
3 : * Copyright (C) 2016 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_defer.h"
14 : #include "xfs_btree.h"
15 : #include "xfs_bmap.h"
16 : #include "xfs_refcount_btree.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_errortag.h"
19 : #include "xfs_error.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_trans.h"
22 : #include "xfs_bit.h"
23 : #include "xfs_refcount.h"
24 : #include "xfs_rmap.h"
25 : #include "xfs_ag.h"
26 : #include "xfs_health.h"
27 :
28 : struct kmem_cache *xfs_refcount_intent_cache;
29 :
30 : /* Allowable refcount adjustment amounts. */
31 : enum xfs_refc_adjust_op {
32 : XFS_REFCOUNT_ADJUST_INCREASE = 1,
33 : XFS_REFCOUNT_ADJUST_DECREASE = -1,
34 : XFS_REFCOUNT_ADJUST_COW_ALLOC = 0,
35 : XFS_REFCOUNT_ADJUST_COW_FREE = -1,
36 : };
37 :
38 : STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
39 : xfs_agblock_t agbno, xfs_extlen_t aglen);
40 : STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
41 : xfs_agblock_t agbno, xfs_extlen_t aglen);
42 :
43 : /*
44 : * Look up the first record less than or equal to [bno, len] in the btree
45 : * given by cur.
46 : */
47 : int
48 1339886065 : xfs_refcount_lookup_le(
49 : struct xfs_btree_cur *cur,
50 : enum xfs_refc_domain domain,
51 : xfs_agblock_t bno,
52 : int *stat)
53 : {
54 1347970993 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
55 : xfs_refcount_encode_startblock(bno, domain),
56 : XFS_LOOKUP_LE);
57 1339901200 : cur->bc_rec.rc.rc_startblock = bno;
58 1339901200 : cur->bc_rec.rc.rc_blockcount = 0;
59 1339901200 : cur->bc_rec.rc.rc_domain = domain;
60 1339901200 : return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
61 : }
62 :
63 : /*
64 : * Look up the first record greater than or equal to [bno, len] in the btree
65 : * given by cur.
66 : */
67 : int
68 249059610 : xfs_refcount_lookup_ge(
69 : struct xfs_btree_cur *cur,
70 : enum xfs_refc_domain domain,
71 : xfs_agblock_t bno,
72 : int *stat)
73 : {
74 256307759 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
75 : xfs_refcount_encode_startblock(bno, domain),
76 : XFS_LOOKUP_GE);
77 249059649 : cur->bc_rec.rc.rc_startblock = bno;
78 249059649 : cur->bc_rec.rc.rc_blockcount = 0;
79 249059649 : cur->bc_rec.rc.rc_domain = domain;
80 249059649 : return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
81 : }
82 :
83 : /*
84 : * Look up the first record equal to [bno, len] in the btree
85 : * given by cur.
86 : */
87 : int
88 0 : xfs_refcount_lookup_eq(
89 : struct xfs_btree_cur *cur,
90 : enum xfs_refc_domain domain,
91 : xfs_agblock_t bno,
92 : int *stat)
93 : {
94 0 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
95 : xfs_refcount_encode_startblock(bno, domain),
96 : XFS_LOOKUP_LE);
97 0 : cur->bc_rec.rc.rc_startblock = bno;
98 0 : cur->bc_rec.rc.rc_blockcount = 0;
99 0 : cur->bc_rec.rc.rc_domain = domain;
100 0 : return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
101 : }
102 :
103 : /* Convert on-disk record to in-core format. */
104 : void
105 2465106388 : xfs_refcount_btrec_to_irec(
106 : const union xfs_btree_rec *rec,
107 : struct xfs_refcount_irec *irec)
108 : {
109 2465106388 : uint32_t start;
110 :
111 2465106388 : start = be32_to_cpu(rec->refc.rc_startblock);
112 2465106388 : if (start & XFS_REFC_COWFLAG) {
113 46857303 : start &= ~XFS_REFC_COWFLAG;
114 46857303 : irec->rc_domain = XFS_REFC_DOMAIN_COW;
115 : } else {
116 2418249085 : irec->rc_domain = XFS_REFC_DOMAIN_SHARED;
117 : }
118 :
119 2465106388 : irec->rc_startblock = start;
120 2465106388 : irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
121 2465106388 : irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
122 2465106388 : }
123 :
124 : inline xfs_failaddr_t
125 2466632092 : xfs_refcount_check_perag_irec(
126 : struct xfs_perag *pag,
127 : const struct xfs_refcount_irec *irec)
128 : {
129 2466632092 : if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
130 0 : return __this_address;
131 :
132 2466632092 : if (!xfs_refcount_check_domain(irec))
133 0 : return __this_address;
134 :
135 : /* check for valid extent range, including overflow */
136 2466632092 : if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
137 4 : return __this_address;
138 :
139 2466632088 : if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
140 0 : return __this_address;
141 :
142 : return NULL;
143 : }
144 :
145 : /* Simple checks for refcount records. */
146 : xfs_failaddr_t
147 13085573 : xfs_refcount_check_irec(
148 : struct xfs_btree_cur *cur,
149 : const struct xfs_refcount_irec *irec)
150 : {
151 13085573 : return xfs_refcount_check_perag_irec(cur->bc_ag.pag, irec);
152 : }
153 :
154 : static inline int
155 4 : xfs_refcount_complain_bad_rec(
156 : struct xfs_btree_cur *cur,
157 : xfs_failaddr_t fa,
158 : const struct xfs_refcount_irec *irec)
159 : {
160 4 : struct xfs_mount *mp = cur->bc_mp;
161 :
162 4 : xfs_warn(mp,
163 : "Refcount BTree record corruption in AG %d detected at %pS!",
164 : cur->bc_ag.pag->pag_agno, fa);
165 4 : xfs_warn(mp,
166 : "Start block 0x%x, block count 0x%x, references 0x%x",
167 : irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
168 4 : xfs_btree_mark_sick(cur);
169 4 : return -EFSCORRUPTED;
170 : }
171 :
172 : /*
173 : * Get the data from the pointed-to record.
174 : */
175 : int
176 2456747222 : xfs_refcount_get_rec(
177 : struct xfs_btree_cur *cur,
178 : struct xfs_refcount_irec *irec,
179 : int *stat)
180 : {
181 2456747222 : union xfs_btree_rec *rec;
182 2456747222 : xfs_failaddr_t fa;
183 2456747222 : int error;
184 :
185 2456747222 : error = xfs_btree_get_rec(cur, &rec, stat);
186 2456814419 : if (error || !*stat)
187 : return error;
188 :
189 2451749929 : xfs_refcount_btrec_to_irec(rec, irec);
190 2451784997 : fa = xfs_refcount_check_irec(cur, irec);
191 2451806473 : if (fa)
192 4 : return xfs_refcount_complain_bad_rec(cur, fa, irec);
193 :
194 2451806469 : trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
195 2451806469 : return 0;
196 : }
197 :
198 : /*
199 : * Update the record referred to by cur to the value given
200 : * by [bno, len, refcount].
201 : * This either works (return 0) or gets an EFSCORRUPTED error.
202 : */
203 : STATIC int
204 70870555 : xfs_refcount_update(
205 : struct xfs_btree_cur *cur,
206 : struct xfs_refcount_irec *irec)
207 : {
208 70870555 : union xfs_btree_rec rec;
209 70870555 : uint32_t start;
210 70870555 : int error;
211 :
212 70870555 : trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
213 :
214 70870558 : start = xfs_refcount_encode_startblock(irec->rc_startblock,
215 : irec->rc_domain);
216 70870558 : rec.refc.rc_startblock = cpu_to_be32(start);
217 70870558 : rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
218 70870558 : rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
219 :
220 70870558 : error = xfs_btree_update(cur, &rec);
221 70870558 : if (error)
222 0 : trace_xfs_refcount_update_error(cur->bc_mp,
223 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
224 70870558 : return error;
225 : }
226 :
227 : /*
228 : * Insert the record referred to by cur to the value given
229 : * by [bno, len, refcount].
230 : * This either works (return 0) or gets an EFSCORRUPTED error.
231 : */
232 : int
233 43295529 : xfs_refcount_insert(
234 : struct xfs_btree_cur *cur,
235 : struct xfs_refcount_irec *irec,
236 : int *i)
237 : {
238 43295529 : int error;
239 :
240 43295529 : trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
241 :
242 43295530 : cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
243 43295530 : cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
244 43295530 : cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
245 43295530 : cur->bc_rec.rc.rc_domain = irec->rc_domain;
246 :
247 43295530 : error = xfs_btree_insert(cur, i);
248 43295530 : if (error)
249 98 : goto out_error;
250 43295432 : if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
251 0 : xfs_btree_mark_sick(cur);
252 0 : error = -EFSCORRUPTED;
253 0 : goto out_error;
254 : }
255 :
256 43295530 : out_error:
257 43295530 : if (error)
258 98 : trace_xfs_refcount_insert_error(cur->bc_mp,
259 98 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
260 43295530 : return error;
261 : }
262 :
263 : /*
264 : * Remove the record referred to by cur, then set the pointer to the spot
265 : * where the record could be re-inserted, in case we want to increment or
266 : * decrement the cursor.
267 : * This either works (return 0) or gets an EFSCORRUPTED error.
268 : */
269 : STATIC int
270 40511153 : xfs_refcount_delete(
271 : struct xfs_btree_cur *cur,
272 : int *i)
273 : {
274 40511153 : struct xfs_refcount_irec irec;
275 40511153 : int found_rec;
276 40511153 : int error;
277 :
278 40511153 : error = xfs_refcount_get_rec(cur, &irec, &found_rec);
279 40511153 : if (error)
280 0 : goto out_error;
281 40511153 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
282 0 : xfs_btree_mark_sick(cur);
283 0 : error = -EFSCORRUPTED;
284 0 : goto out_error;
285 : }
286 40511153 : trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
287 40511153 : error = xfs_btree_delete(cur, i);
288 40511153 : if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
289 0 : xfs_btree_mark_sick(cur);
290 0 : error = -EFSCORRUPTED;
291 0 : goto out_error;
292 : }
293 40511153 : if (error)
294 0 : goto out_error;
295 40511153 : error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock,
296 : &found_rec);
297 40511151 : out_error:
298 40511151 : if (error)
299 0 : trace_xfs_refcount_delete_error(cur->bc_mp,
300 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
301 40511151 : return error;
302 : }
303 :
304 : /*
305 : * Adjusting the Reference Count
306 : *
307 : * As stated elsewhere, the reference count btree (refcbt) stores
308 : * >1 reference counts for extents of physical blocks. In this
309 : * operation, we're either raising or lowering the reference count of
310 : * some subrange stored in the tree:
311 : *
312 : * <------ adjustment range ------>
313 : * ----+ +---+-----+ +--+--------+---------
314 : * 2 | | 3 | 4 | |17| 55 | 10
315 : * ----+ +---+-----+ +--+--------+---------
316 : * X axis is physical blocks number;
317 : * reference counts are the numbers inside the rectangles
318 : *
319 : * The first thing we need to do is to ensure that there are no
320 : * refcount extents crossing either boundary of the range to be
321 : * adjusted. For any extent that does cross a boundary, split it into
322 : * two extents so that we can increment the refcount of one of the
323 : * pieces later:
324 : *
325 : * <------ adjustment range ------>
326 : * ----+ +---+-----+ +--+--------+----+----
327 : * 2 | | 3 | 2 | |17| 55 | 10 | 10
328 : * ----+ +---+-----+ +--+--------+----+----
329 : *
330 : * For this next step, let's assume that all the physical blocks in
331 : * the adjustment range are mapped to a file and are therefore in use
332 : * at least once. Therefore, we can infer that any gap in the
333 : * refcount tree within the adjustment range represents a physical
334 : * extent with refcount == 1:
335 : *
336 : * <------ adjustment range ------>
337 : * ----+---+---+-----+-+--+--------+----+----
338 : * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10
339 : * ----+---+---+-----+-+--+--------+----+----
340 : * ^
341 : *
342 : * For each extent that falls within the interval range, figure out
343 : * which extent is to the left or the right of that extent. Now we
344 : * have a left, current, and right extent. If the new reference count
345 : * of the center extent enables us to merge left, center, and right
346 : * into one record covering all three, do so. If the center extent is
347 : * at the left end of the range, abuts the left extent, and its new
348 : * reference count matches the left extent's record, then merge them.
349 : * If the center extent is at the right end of the range, abuts the
350 : * right extent, and the reference counts match, merge those. In the
351 : * example, we can left merge (assuming an increment operation):
352 : *
353 : * <------ adjustment range ------>
354 : * --------+---+-----+-+--+--------+----+----
355 : * 2 | 3 | 2 |1|17| 55 | 10 | 10
356 : * --------+---+-----+-+--+--------+----+----
357 : * ^
358 : *
359 : * For all other extents within the range, adjust the reference count
360 : * or delete it if the refcount falls below 2. If we were
361 : * incrementing, the end result looks like this:
362 : *
363 : * <------ adjustment range ------>
364 : * --------+---+-----+-+--+--------+----+----
365 : * 2 | 4 | 3 |2|18| 56 | 11 | 10
366 : * --------+---+-----+-+--+--------+----+----
367 : *
368 : * The result of a decrement operation looks as such:
369 : *
370 : * <------ adjustment range ------>
371 : * ----+ +---+ +--+--------+----+----
372 : * 2 | | 2 | |16| 54 | 9 | 10
373 : * ----+ +---+ +--+--------+----+----
374 : * DDDD 111111DD
375 : *
376 : * The blocks marked "D" are freed; the blocks marked "1" are only
377 : * referenced once and therefore the record is removed from the
378 : * refcount btree.
379 : */
380 :
381 : /* Next block after this extent. */
382 : static inline xfs_agblock_t
383 : xfs_refc_next(
384 : struct xfs_refcount_irec *rc)
385 : {
386 264704556 : return rc->rc_startblock + rc->rc_blockcount;
387 : }
388 :
389 : /*
390 : * Split a refcount extent that crosses agbno.
391 : */
392 : STATIC int
393 215748273 : xfs_refcount_split_extent(
394 : struct xfs_btree_cur *cur,
395 : enum xfs_refc_domain domain,
396 : xfs_agblock_t agbno,
397 : bool *shape_changed)
398 : {
399 215748273 : struct xfs_refcount_irec rcext, tmp;
400 215748273 : int found_rec;
401 215748273 : int error;
402 :
403 215748273 : *shape_changed = false;
404 215748273 : error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec);
405 215748391 : if (error)
406 59 : goto out_error;
407 215748332 : if (!found_rec)
408 : return 0;
409 :
410 211939804 : error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
411 211939765 : if (error)
412 0 : goto out_error;
413 211939765 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
414 0 : xfs_btree_mark_sick(cur);
415 0 : error = -EFSCORRUPTED;
416 0 : goto out_error;
417 : }
418 211939765 : if (rcext.rc_domain != domain)
419 : return 0;
420 211778013 : if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
421 : return 0;
422 :
423 19934844 : *shape_changed = true;
424 19934844 : trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
425 : &rcext, agbno);
426 :
427 : /* Establish the right extent. */
428 19934844 : tmp = rcext;
429 19934844 : tmp.rc_startblock = agbno;
430 19934844 : tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
431 19934844 : error = xfs_refcount_update(cur, &tmp);
432 19934844 : if (error)
433 0 : goto out_error;
434 :
435 : /* Insert the left extent. */
436 19934844 : tmp = rcext;
437 19934844 : tmp.rc_blockcount = agbno - rcext.rc_startblock;
438 19934844 : error = xfs_refcount_insert(cur, &tmp, &found_rec);
439 19934845 : if (error)
440 20 : goto out_error;
441 19934825 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
442 0 : xfs_btree_mark_sick(cur);
443 0 : error = -EFSCORRUPTED;
444 0 : goto out_error;
445 : }
446 : return error;
447 :
448 79 : out_error:
449 158 : trace_xfs_refcount_split_extent_error(cur->bc_mp,
450 79 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
451 79 : return error;
452 : }
453 :
454 : /*
455 : * Merge the left, center, and right extents.
456 : */
457 : STATIC int
458 1799778 : xfs_refcount_merge_center_extents(
459 : struct xfs_btree_cur *cur,
460 : struct xfs_refcount_irec *left,
461 : struct xfs_refcount_irec *center,
462 : struct xfs_refcount_irec *right,
463 : unsigned long long extlen,
464 : xfs_extlen_t *aglen)
465 : {
466 1799778 : int error;
467 1799778 : int found_rec;
468 :
469 1799778 : trace_xfs_refcount_merge_center_extents(cur->bc_mp,
470 1799778 : cur->bc_ag.pag->pag_agno, left, center, right);
471 :
472 1799778 : ASSERT(left->rc_domain == center->rc_domain);
473 1799778 : ASSERT(right->rc_domain == center->rc_domain);
474 :
475 : /*
476 : * Make sure the center and right extents are not in the btree.
477 : * If the center extent was synthesized, the first delete call
478 : * removes the right extent and we skip the second deletion.
479 : * If center and right were in the btree, then the first delete
480 : * call removes the center and the second one removes the right
481 : * extent.
482 : */
483 1799778 : error = xfs_refcount_lookup_ge(cur, center->rc_domain,
484 : center->rc_startblock, &found_rec);
485 1799778 : if (error)
486 0 : goto out_error;
487 1799778 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
488 0 : xfs_btree_mark_sick(cur);
489 0 : error = -EFSCORRUPTED;
490 0 : goto out_error;
491 : }
492 :
493 1799778 : error = xfs_refcount_delete(cur, &found_rec);
494 1799778 : if (error)
495 0 : goto out_error;
496 1799778 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
497 0 : xfs_btree_mark_sick(cur);
498 0 : error = -EFSCORRUPTED;
499 0 : goto out_error;
500 : }
501 :
502 1799778 : if (center->rc_refcount > 1) {
503 486785 : error = xfs_refcount_delete(cur, &found_rec);
504 486785 : if (error)
505 0 : goto out_error;
506 486785 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
507 0 : xfs_btree_mark_sick(cur);
508 0 : error = -EFSCORRUPTED;
509 0 : goto out_error;
510 : }
511 : }
512 :
513 : /* Enlarge the left extent. */
514 1799778 : error = xfs_refcount_lookup_le(cur, left->rc_domain,
515 : left->rc_startblock, &found_rec);
516 1799778 : if (error)
517 0 : goto out_error;
518 1799778 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
519 0 : xfs_btree_mark_sick(cur);
520 0 : error = -EFSCORRUPTED;
521 0 : goto out_error;
522 : }
523 :
524 1799778 : left->rc_blockcount = extlen;
525 1799778 : error = xfs_refcount_update(cur, left);
526 1799778 : if (error)
527 0 : goto out_error;
528 :
529 1799778 : *aglen = 0;
530 1799778 : return error;
531 :
532 0 : out_error:
533 0 : trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
534 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
535 0 : return error;
536 : }
537 :
538 : /*
539 : * Merge with the left extent.
540 : */
541 : STATIC int
542 5569142 : xfs_refcount_merge_left_extent(
543 : struct xfs_btree_cur *cur,
544 : struct xfs_refcount_irec *left,
545 : struct xfs_refcount_irec *cleft,
546 : xfs_agblock_t *agbno,
547 : xfs_extlen_t *aglen)
548 : {
549 5569142 : int error;
550 5569142 : int found_rec;
551 :
552 5569142 : trace_xfs_refcount_merge_left_extent(cur->bc_mp,
553 5569142 : cur->bc_ag.pag->pag_agno, left, cleft);
554 :
555 5569137 : ASSERT(left->rc_domain == cleft->rc_domain);
556 :
557 : /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
558 5569137 : if (cleft->rc_refcount > 1) {
559 1889025 : error = xfs_refcount_lookup_le(cur, cleft->rc_domain,
560 : cleft->rc_startblock, &found_rec);
561 1889025 : if (error)
562 0 : goto out_error;
563 1889025 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
564 0 : xfs_btree_mark_sick(cur);
565 0 : error = -EFSCORRUPTED;
566 0 : goto out_error;
567 : }
568 :
569 1889025 : error = xfs_refcount_delete(cur, &found_rec);
570 1889025 : if (error)
571 0 : goto out_error;
572 1889025 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
573 0 : xfs_btree_mark_sick(cur);
574 0 : error = -EFSCORRUPTED;
575 0 : goto out_error;
576 : }
577 : }
578 :
579 : /* Enlarge the left extent. */
580 5569137 : error = xfs_refcount_lookup_le(cur, left->rc_domain,
581 : left->rc_startblock, &found_rec);
582 5569141 : if (error)
583 0 : goto out_error;
584 5569141 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
585 0 : xfs_btree_mark_sick(cur);
586 0 : error = -EFSCORRUPTED;
587 0 : goto out_error;
588 : }
589 :
590 5569141 : left->rc_blockcount += cleft->rc_blockcount;
591 5569141 : error = xfs_refcount_update(cur, left);
592 5569141 : if (error)
593 0 : goto out_error;
594 :
595 5569141 : *agbno += cleft->rc_blockcount;
596 5569141 : *aglen -= cleft->rc_blockcount;
597 5569141 : return error;
598 :
599 0 : out_error:
600 0 : trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
601 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
602 0 : return error;
603 : }
604 :
605 : /*
606 : * Merge with the right extent.
607 : */
608 : STATIC int
609 3852452 : xfs_refcount_merge_right_extent(
610 : struct xfs_btree_cur *cur,
611 : struct xfs_refcount_irec *right,
612 : struct xfs_refcount_irec *cright,
613 : xfs_extlen_t *aglen)
614 : {
615 3852452 : int error;
616 3852452 : int found_rec;
617 :
618 3852452 : trace_xfs_refcount_merge_right_extent(cur->bc_mp,
619 3852452 : cur->bc_ag.pag->pag_agno, cright, right);
620 :
621 3852452 : ASSERT(right->rc_domain == cright->rc_domain);
622 :
623 : /*
624 : * If the extent ending at agbno+aglen (cright) wasn't synthesized,
625 : * remove it.
626 : */
627 3852452 : if (cright->rc_refcount > 1) {
628 1801887 : error = xfs_refcount_lookup_le(cur, cright->rc_domain,
629 : cright->rc_startblock, &found_rec);
630 1801887 : if (error)
631 0 : goto out_error;
632 1801887 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
633 0 : xfs_btree_mark_sick(cur);
634 0 : error = -EFSCORRUPTED;
635 0 : goto out_error;
636 : }
637 :
638 1801887 : error = xfs_refcount_delete(cur, &found_rec);
639 1801887 : if (error)
640 0 : goto out_error;
641 1801887 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
642 0 : xfs_btree_mark_sick(cur);
643 0 : error = -EFSCORRUPTED;
644 0 : goto out_error;
645 : }
646 : }
647 :
648 : /* Enlarge the right extent. */
649 3852452 : error = xfs_refcount_lookup_le(cur, right->rc_domain,
650 : right->rc_startblock, &found_rec);
651 3852452 : if (error)
652 0 : goto out_error;
653 3852452 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
654 0 : xfs_btree_mark_sick(cur);
655 0 : error = -EFSCORRUPTED;
656 0 : goto out_error;
657 : }
658 :
659 3852452 : right->rc_startblock -= cright->rc_blockcount;
660 3852452 : right->rc_blockcount += cright->rc_blockcount;
661 3852452 : error = xfs_refcount_update(cur, right);
662 3852452 : if (error)
663 0 : goto out_error;
664 :
665 3852452 : *aglen -= cright->rc_blockcount;
666 3852452 : return error;
667 :
668 0 : out_error:
669 0 : trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
670 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
671 0 : return error;
672 : }
673 :
674 : /*
675 : * Find the left extent and the one after it (cleft). This function assumes
676 : * that we've already split any extent crossing agbno.
677 : */
678 : STATIC int
679 107874146 : xfs_refcount_find_left_extents(
680 : struct xfs_btree_cur *cur,
681 : struct xfs_refcount_irec *left,
682 : struct xfs_refcount_irec *cleft,
683 : enum xfs_refc_domain domain,
684 : xfs_agblock_t agbno,
685 : xfs_extlen_t aglen)
686 : {
687 107874146 : struct xfs_refcount_irec tmp;
688 107874146 : int error;
689 107874146 : int found_rec;
690 :
691 107874146 : left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
692 107874146 : error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec);
693 107874118 : if (error)
694 0 : goto out_error;
695 107874118 : if (!found_rec)
696 : return 0;
697 :
698 97790094 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
699 97790081 : if (error)
700 0 : goto out_error;
701 97790081 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
702 0 : xfs_btree_mark_sick(cur);
703 0 : error = -EFSCORRUPTED;
704 0 : goto out_error;
705 : }
706 :
707 97790081 : if (tmp.rc_domain != domain)
708 : return 0;
709 97231530 : if (xfs_refc_next(&tmp) != agbno)
710 : return 0;
711 : /* We have a left extent; retrieve (or invent) the next right one */
712 18658443 : *left = tmp;
713 :
714 18658443 : error = xfs_btree_increment(cur, 0, &found_rec);
715 18658441 : if (error)
716 0 : goto out_error;
717 18658441 : if (found_rec) {
718 18516747 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
719 18516747 : if (error)
720 0 : goto out_error;
721 18516747 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
722 0 : xfs_btree_mark_sick(cur);
723 0 : error = -EFSCORRUPTED;
724 0 : goto out_error;
725 : }
726 :
727 18516747 : if (tmp.rc_domain != domain)
728 13843 : goto not_found;
729 :
730 : /* if tmp starts at the end of our range, just use that */
731 18502904 : if (tmp.rc_startblock == agbno)
732 13504369 : *cleft = tmp;
733 : else {
734 : /*
735 : * There's a gap in the refcntbt at the start of the
736 : * range we're interested in (refcount == 1) so
737 : * synthesize the implied extent and pass it back.
738 : * We assume here that the agbno/aglen range was
739 : * passed in from a data fork extent mapping and
740 : * therefore is allocated to exactly one owner.
741 : */
742 4998535 : cleft->rc_startblock = agbno;
743 4998535 : cleft->rc_blockcount = min(aglen,
744 : tmp.rc_startblock - agbno);
745 4998535 : cleft->rc_refcount = 1;
746 4998535 : cleft->rc_domain = domain;
747 : }
748 : } else {
749 141694 : not_found:
750 : /*
751 : * No extents, so pretend that there's one covering the whole
752 : * range.
753 : */
754 155537 : cleft->rc_startblock = agbno;
755 155537 : cleft->rc_blockcount = aglen;
756 155537 : cleft->rc_refcount = 1;
757 155537 : cleft->rc_domain = domain;
758 : }
759 18658441 : trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
760 : left, cleft, agbno);
761 18658441 : return error;
762 :
763 0 : out_error:
764 0 : trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
765 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
766 0 : return error;
767 : }
768 :
769 : /*
770 : * Find the right extent and the one before it (cright). This function
771 : * assumes that we've already split any extents crossing agbno + aglen.
772 : */
773 : STATIC int
774 107874134 : xfs_refcount_find_right_extents(
775 : struct xfs_btree_cur *cur,
776 : struct xfs_refcount_irec *right,
777 : struct xfs_refcount_irec *cright,
778 : enum xfs_refc_domain domain,
779 : xfs_agblock_t agbno,
780 : xfs_extlen_t aglen)
781 : {
782 107874134 : struct xfs_refcount_irec tmp;
783 107874134 : int error;
784 107874134 : int found_rec;
785 :
786 107874134 : right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
787 107874134 : error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec);
788 107874165 : if (error)
789 0 : goto out_error;
790 107874165 : if (!found_rec)
791 : return 0;
792 :
793 94625445 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
794 94625450 : if (error)
795 4 : goto out_error;
796 94625446 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
797 0 : xfs_btree_mark_sick(cur);
798 0 : error = -EFSCORRUPTED;
799 0 : goto out_error;
800 : }
801 :
802 94625446 : if (tmp.rc_domain != domain)
803 : return 0;
804 93341849 : if (tmp.rc_startblock != agbno + aglen)
805 : return 0;
806 : /* We have a right extent; retrieve (or invent) the next left one */
807 16591871 : *right = tmp;
808 :
809 16591871 : error = xfs_btree_decrement(cur, 0, &found_rec);
810 16591871 : if (error)
811 0 : goto out_error;
812 16591871 : if (found_rec) {
813 16578270 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
814 16578269 : if (error)
815 0 : goto out_error;
816 16578269 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
817 0 : xfs_btree_mark_sick(cur);
818 0 : error = -EFSCORRUPTED;
819 0 : goto out_error;
820 : }
821 :
822 16578269 : if (tmp.rc_domain != domain)
823 32 : goto not_found;
824 :
825 : /* if tmp ends at the end of our range, just use that */
826 16578237 : if (xfs_refc_next(&tmp) == agbno + aglen)
827 13097585 : *cright = tmp;
828 : else {
829 : /*
830 : * There's a gap in the refcntbt at the end of the
831 : * range we're interested in (refcount == 1) so
832 : * create the implied extent and pass it back.
833 : * We assume here that the agbno/aglen range was
834 : * passed in from a data fork extent mapping and
835 : * therefore is allocated to exactly one owner.
836 : */
837 3480652 : cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
838 3480652 : cright->rc_blockcount = right->rc_startblock -
839 : cright->rc_startblock;
840 3480652 : cright->rc_refcount = 1;
841 3480652 : cright->rc_domain = domain;
842 : }
843 : } else {
844 13601 : not_found:
845 : /*
846 : * No extents, so pretend that there's one covering the whole
847 : * range.
848 : */
849 13633 : cright->rc_startblock = agbno;
850 13633 : cright->rc_blockcount = aglen;
851 13633 : cright->rc_refcount = 1;
852 13633 : cright->rc_domain = domain;
853 : }
854 16591870 : trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
855 : cright, right, agbno + aglen);
856 16591870 : return error;
857 :
858 4 : out_error:
859 8 : trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
860 4 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
861 4 : return error;
862 : }
863 :
864 : /* Is this extent valid? */
865 : static inline bool
866 : xfs_refc_valid(
867 : const struct xfs_refcount_irec *rc)
868 : {
869 340463641 : return rc->rc_startblock != NULLAGBLOCK;
870 : }
871 :
872 : static inline xfs_nlink_t
873 : xfs_refc_merge_refcount(
874 : const struct xfs_refcount_irec *irec,
875 : enum xfs_refc_adjust_op adjust)
876 : {
877 : /* Once a record hits MAXREFCOUNT, it is pinned there forever */
878 39059257 : if (irec->rc_refcount == MAXREFCOUNT)
879 : return MAXREFCOUNT;
880 39059247 : return irec->rc_refcount + adjust;
881 : }
882 :
883 : static inline bool
884 27265438 : xfs_refc_want_merge_center(
885 : const struct xfs_refcount_irec *left,
886 : const struct xfs_refcount_irec *cleft,
887 : const struct xfs_refcount_irec *cright,
888 : const struct xfs_refcount_irec *right,
889 : bool cleft_is_cright,
890 : enum xfs_refc_adjust_op adjust,
891 : unsigned long long *ulenp)
892 : {
893 27265438 : unsigned long long ulen = left->rc_blockcount;
894 27265438 : xfs_nlink_t new_refcount;
895 :
896 : /*
897 : * To merge with a center record, both shoulder records must be
898 : * adjacent to the record we want to adjust. This is only true if
899 : * find_left and find_right made all four records valid.
900 : */
901 27265438 : if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
902 7984872 : !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
903 : return false;
904 :
905 : /* There must only be one record for the entire range. */
906 7984872 : if (!cleft_is_cright)
907 : return false;
908 :
909 : /* The shoulder record refcounts must match the new refcount. */
910 7959469 : new_refcount = xfs_refc_merge_refcount(cleft, adjust);
911 7959469 : if (left->rc_refcount != new_refcount)
912 : return false;
913 2350744 : if (right->rc_refcount != new_refcount)
914 : return false;
915 :
916 : /*
917 : * The new record cannot exceed the max length. ulen is a ULL as the
918 : * individual record block counts can be up to (u32 - 1) in length
919 : * hence we need to catch u32 addition overflows here.
920 : */
921 1799778 : ulen += cleft->rc_blockcount + right->rc_blockcount;
922 1799778 : if (ulen >= MAXREFCEXTLEN)
923 : return false;
924 :
925 1799778 : *ulenp = ulen;
926 1799778 : return true;
927 : }
928 :
929 : static inline bool
930 25465660 : xfs_refc_want_merge_left(
931 : const struct xfs_refcount_irec *left,
932 : const struct xfs_refcount_irec *cleft,
933 : enum xfs_refc_adjust_op adjust)
934 : {
935 25465660 : unsigned long long ulen = left->rc_blockcount;
936 25465660 : xfs_nlink_t new_refcount;
937 :
938 : /*
939 : * For a left merge, the left shoulder record must be adjacent to the
940 : * start of the range. If this is true, find_left made left and cleft
941 : * contain valid contents.
942 : */
943 25465660 : if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
944 : return false;
945 :
946 : /* Left shoulder record refcount must match the new refcount. */
947 16858661 : new_refcount = xfs_refc_merge_refcount(cleft, adjust);
948 16858661 : if (left->rc_refcount != new_refcount)
949 : return false;
950 :
951 : /*
952 : * The new record cannot exceed the max length. ulen is a ULL as the
953 : * individual record block counts can be up to (u32 - 1) in length
954 : * hence we need to catch u32 addition overflows here.
955 : */
956 5569140 : ulen += cleft->rc_blockcount;
957 5569140 : if (ulen >= MAXREFCEXTLEN)
958 0 : return false;
959 :
960 : return true;
961 : }
962 :
963 : static inline bool
964 24914695 : xfs_refc_want_merge_right(
965 : const struct xfs_refcount_irec *cright,
966 : const struct xfs_refcount_irec *right,
967 : enum xfs_refc_adjust_op adjust)
968 : {
969 24914695 : unsigned long long ulen = right->rc_blockcount;
970 24914695 : xfs_nlink_t new_refcount;
971 :
972 : /*
973 : * For a right merge, the right shoulder record must be adjacent to the
974 : * end of the range. If this is true, find_right made cright and right
975 : * contain valid contents.
976 : */
977 24914695 : if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
978 : return false;
979 :
980 : /* Right shoulder record refcount must match the new refcount. */
981 14241127 : new_refcount = xfs_refc_merge_refcount(cright, adjust);
982 14241127 : if (right->rc_refcount != new_refcount)
983 : return false;
984 :
985 : /*
986 : * The new record cannot exceed the max length. ulen is a ULL as the
987 : * individual record block counts can be up to (u32 - 1) in length
988 : * hence we need to catch u32 addition overflows here.
989 : */
990 3852452 : ulen += cright->rc_blockcount;
991 3852452 : if (ulen >= MAXREFCEXTLEN)
992 0 : return false;
993 :
994 : return true;
995 : }
996 :
997 : /*
998 : * Try to merge with any extents on the boundaries of the adjustment range.
999 : */
1000 : STATIC int
1001 107874112 : xfs_refcount_merge_extents(
1002 : struct xfs_btree_cur *cur,
1003 : enum xfs_refc_domain domain,
1004 : xfs_agblock_t *agbno,
1005 : xfs_extlen_t *aglen,
1006 : enum xfs_refc_adjust_op adjust,
1007 : bool *shape_changed)
1008 : {
1009 107874112 : struct xfs_refcount_irec left = {0}, cleft = {0};
1010 107874112 : struct xfs_refcount_irec cright = {0}, right = {0};
1011 107874112 : int error;
1012 107874112 : unsigned long long ulen;
1013 107874112 : bool cequal;
1014 :
1015 107874112 : *shape_changed = false;
1016 : /*
1017 : * Find the extent just below agbno [left], just above agbno [cleft],
1018 : * just below (agbno + aglen) [cright], and just above (agbno + aglen)
1019 : * [right].
1020 : */
1021 107874112 : error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain,
1022 : *agbno, *aglen);
1023 107874159 : if (error)
1024 : return error;
1025 107874157 : error = xfs_refcount_find_right_extents(cur, &right, &cright, domain,
1026 : *agbno, *aglen);
1027 107874163 : if (error)
1028 : return error;
1029 :
1030 : /* No left or right extent to merge; exit. */
1031 107874159 : if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
1032 : return 0;
1033 :
1034 27265437 : cequal = (cleft.rc_startblock == cright.rc_startblock) &&
1035 7959468 : (cleft.rc_blockcount == cright.rc_blockcount);
1036 :
1037 : /* Try to merge left, cleft, and right. cleft must == cright. */
1038 27265437 : if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
1039 : adjust, &ulen)) {
1040 1799778 : *shape_changed = true;
1041 1799778 : return xfs_refcount_merge_center_extents(cur, &left, &cleft,
1042 : &right, ulen, aglen);
1043 : }
1044 :
1045 : /* Try to merge left and cleft. */
1046 25465656 : if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
1047 5569134 : *shape_changed = true;
1048 5569134 : error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
1049 : agbno, aglen);
1050 5569141 : if (error)
1051 : return error;
1052 :
1053 : /*
1054 : * If we just merged left + cleft and cleft == cright,
1055 : * we no longer have a cright to merge with right. We're done.
1056 : */
1057 5569141 : if (cequal)
1058 : return 0;
1059 : }
1060 :
1061 : /* Try to merge cright and right. */
1062 24914697 : if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
1063 3852452 : *shape_changed = true;
1064 3852452 : return xfs_refcount_merge_right_extent(cur, &right, &cright,
1065 : aglen);
1066 : }
1067 :
1068 : return 0;
1069 : }
1070 :
1071 : /*
1072 : * XXX: This is a pretty hand-wavy estimate. The penalty for guessing
1073 : * true incorrectly is a shutdown FS; the penalty for guessing false
1074 : * incorrectly is more transaction rolls than might be necessary.
1075 : * Be conservative here.
1076 : */
1077 : static bool
1078 110579190 : xfs_refcount_still_have_space(
1079 : struct xfs_btree_cur *cur)
1080 : {
1081 110579190 : unsigned long overhead;
1082 :
1083 : /*
1084 : * Worst case estimate: full splits of the free space and rmap btrees
1085 : * to handle each of the shape changes to the refcount btree.
1086 : */
1087 110579190 : overhead = xfs_allocfree_block_count(cur->bc_mp,
1088 : cur->bc_ag.refc.shape_changes);
1089 110579219 : overhead += cur->bc_mp->m_refc_maxlevels;
1090 110579219 : overhead *= cur->bc_mp->m_sb.sb_blocksize;
1091 :
1092 : /*
1093 : * Only allow 2 refcount extent updates per transaction if the
1094 : * refcount continue update "error" has been injected.
1095 : */
1096 125789759 : if (cur->bc_ag.refc.nr_ops > 2 &&
1097 15210540 : XFS_TEST_ERROR(false, cur->bc_mp,
1098 : XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
1099 : return false;
1100 :
1101 110579135 : if (cur->bc_ag.refc.nr_ops == 0)
1102 : return true;
1103 16483936 : else if (overhead > cur->bc_tp->t_log_res)
1104 : return false;
1105 16483936 : return cur->bc_tp->t_log_res - overhead >
1106 16483936 : cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
1107 : }
1108 :
1109 : /*
1110 : * Adjust the refcounts of middle extents. At this point we should have
1111 : * split extents that crossed the adjustment range; merged with adjacent
1112 : * extents; and updated agbno/aglen to reflect the merges. Therefore,
1113 : * all we have to do is update the extents inside [agbno, agbno + aglen].
1114 : */
1115 : STATIC int
1116 105304862 : xfs_refcount_adjust_extents(
1117 : struct xfs_btree_cur *cur,
1118 : xfs_agblock_t *agbno,
1119 : xfs_extlen_t *aglen,
1120 : enum xfs_refc_adjust_op adj)
1121 : {
1122 105304862 : struct xfs_refcount_irec ext, tmp;
1123 105304862 : int error;
1124 105304862 : int found_rec, found_tmp;
1125 105304862 : xfs_fsblock_t fsbno;
1126 :
1127 : /* Merging did all the work already. */
1128 105304862 : if (*aglen == 0)
1129 : return 0;
1130 :
1131 94221571 : error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno,
1132 : &found_rec);
1133 94221584 : if (error)
1134 0 : goto out_error;
1135 :
1136 166238792 : while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
1137 108366667 : error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1138 108366691 : if (error)
1139 0 : goto out_error;
1140 108366691 : if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) {
1141 6099206 : ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1142 6099206 : ext.rc_blockcount = 0;
1143 6099206 : ext.rc_refcount = 0;
1144 6099206 : ext.rc_domain = XFS_REFC_DOMAIN_SHARED;
1145 : }
1146 :
1147 : /*
1148 : * Deal with a hole in the refcount tree; if a file maps to
1149 : * these blocks and there's no refcountbt record, pretend that
1150 : * there is one with refcount == 1.
1151 : */
1152 108366691 : if (ext.rc_startblock != *agbno) {
1153 38561632 : tmp.rc_startblock = *agbno;
1154 38561632 : tmp.rc_blockcount = min(*aglen,
1155 : ext.rc_startblock - *agbno);
1156 38561632 : tmp.rc_refcount = 1 + adj;
1157 38561632 : tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
1158 :
1159 38561632 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1160 38561632 : cur->bc_ag.pag->pag_agno, &tmp);
1161 :
1162 : /*
1163 : * Either cover the hole (increment) or
1164 : * delete the range (decrement).
1165 : */
1166 38561633 : cur->bc_ag.refc.nr_ops++;
1167 38561633 : if (tmp.rc_refcount) {
1168 23150566 : error = xfs_refcount_insert(cur, &tmp,
1169 : &found_tmp);
1170 23150566 : if (error)
1171 78 : goto out_error;
1172 23150488 : if (XFS_IS_CORRUPT(cur->bc_mp,
1173 : found_tmp != 1)) {
1174 0 : xfs_btree_mark_sick(cur);
1175 0 : error = -EFSCORRUPTED;
1176 0 : goto out_error;
1177 : }
1178 : } else {
1179 15411067 : fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1180 : cur->bc_ag.pag->pag_agno,
1181 : tmp.rc_startblock);
1182 15411067 : error = xfs_free_extent_later(cur->bc_tp, fsbno,
1183 15411067 : tmp.rc_blockcount, NULL,
1184 : XFS_AG_RESV_NONE);
1185 15411046 : if (error)
1186 0 : goto out_error;
1187 : }
1188 :
1189 38561534 : (*agbno) += tmp.rc_blockcount;
1190 38561534 : (*aglen) -= tmp.rc_blockcount;
1191 :
1192 : /* Stop if there's nothing left to modify */
1193 38561534 : if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
1194 : break;
1195 :
1196 : /* Move the cursor to the start of ext. */
1197 2212146 : error = xfs_refcount_lookup_ge(cur,
1198 : XFS_REFC_DOMAIN_SHARED, *agbno,
1199 : &found_rec);
1200 2212146 : if (error)
1201 0 : goto out_error;
1202 : }
1203 :
1204 : /*
1205 : * A previous step trimmed agbno/aglen such that the end of the
1206 : * range would not be in the middle of the record. If this is
1207 : * no longer the case, something is seriously wrong with the
1208 : * btree. Make sure we never feed the synthesized record into
1209 : * the processing loop below.
1210 : */
1211 72017205 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
1212 72017205 : XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
1213 0 : xfs_btree_mark_sick(cur);
1214 0 : error = -EFSCORRUPTED;
1215 0 : goto out_error;
1216 : }
1217 :
1218 : /*
1219 : * Adjust the reference count and either update the tree
1220 : * (incr) or free the blocks (decr).
1221 : */
1222 72017205 : if (ext.rc_refcount == MAXREFCOUNT)
1223 16 : goto skip;
1224 72017189 : ext.rc_refcount += adj;
1225 72017189 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1226 72017189 : cur->bc_ag.pag->pag_agno, &ext);
1227 72017192 : cur->bc_ag.refc.nr_ops++;
1228 72017192 : if (ext.rc_refcount > 1) {
1229 39714344 : error = xfs_refcount_update(cur, &ext);
1230 39714344 : if (error)
1231 0 : goto out_error;
1232 32302848 : } else if (ext.rc_refcount == 1) {
1233 32302848 : error = xfs_refcount_delete(cur, &found_rec);
1234 32302848 : if (error)
1235 0 : goto out_error;
1236 32302848 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1237 0 : xfs_btree_mark_sick(cur);
1238 0 : error = -EFSCORRUPTED;
1239 0 : goto out_error;
1240 : }
1241 32302848 : goto advloop;
1242 : } else {
1243 0 : fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1244 : cur->bc_ag.pag->pag_agno,
1245 : ext.rc_startblock);
1246 0 : error = xfs_free_extent_later(cur->bc_tp, fsbno,
1247 0 : ext.rc_blockcount, NULL,
1248 : XFS_AG_RESV_NONE);
1249 0 : if (error)
1250 0 : goto out_error;
1251 : }
1252 :
1253 0 : skip:
1254 39714360 : error = xfs_btree_increment(cur, 0, &found_rec);
1255 39714360 : if (error)
1256 0 : goto out_error;
1257 :
1258 39714360 : advloop:
1259 72017208 : (*agbno) += ext.rc_blockcount;
1260 72017208 : (*aglen) -= ext.rc_blockcount;
1261 : }
1262 :
1263 : return error;
1264 78 : out_error:
1265 156 : trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1266 78 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1267 78 : return error;
1268 : }
1269 :
1270 : /* Adjust the reference count of a range of AG blocks. */
1271 : STATIC int
1272 105304976 : xfs_refcount_adjust(
1273 : struct xfs_btree_cur *cur,
1274 : xfs_agblock_t *agbno,
1275 : xfs_extlen_t *aglen,
1276 : enum xfs_refc_adjust_op adj)
1277 : {
1278 105304976 : bool shape_changed;
1279 105304976 : int shape_changes = 0;
1280 105304976 : int error;
1281 :
1282 105304976 : if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
1283 50340379 : trace_xfs_refcount_increase(cur->bc_mp,
1284 50340379 : cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1285 : else
1286 54964597 : trace_xfs_refcount_decrease(cur->bc_mp,
1287 54964597 : cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1288 :
1289 : /*
1290 : * Ensure that no rcextents cross the boundary of the adjustment range.
1291 : */
1292 105304999 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1293 : *agbno, &shape_changed);
1294 105304971 : if (error)
1295 69 : goto out_error;
1296 105304902 : if (shape_changed)
1297 9388558 : shape_changes++;
1298 :
1299 105304902 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1300 105304902 : *agbno + *aglen, &shape_changed);
1301 105304935 : if (error)
1302 10 : goto out_error;
1303 105304925 : if (shape_changed)
1304 8519586 : shape_changes++;
1305 :
1306 : /*
1307 : * Try to merge with the left or right extents of the range.
1308 : */
1309 105304925 : error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
1310 : agbno, aglen, adj, &shape_changed);
1311 105304930 : if (error)
1312 4 : goto out_error;
1313 105304926 : if (shape_changed)
1314 11093053 : shape_changes++;
1315 105304926 : if (shape_changes)
1316 23895281 : cur->bc_ag.refc.shape_changes++;
1317 :
1318 : /* Now that we've taken care of the ends, adjust the middle extents */
1319 105304926 : error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj);
1320 105304860 : if (error)
1321 78 : goto out_error;
1322 :
1323 : return 0;
1324 :
1325 161 : out_error:
1326 322 : trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1327 161 : error, _RET_IP_);
1328 161 : return error;
1329 : }
1330 :
1331 : /* Clean up after calling xfs_refcount_finish_one. */
1332 : void
1333 107589022 : xfs_refcount_finish_one_cleanup(
1334 : struct xfs_trans *tp,
1335 : struct xfs_btree_cur *rcur,
1336 : int error)
1337 : {
1338 107638646 : struct xfs_buf *agbp;
1339 :
1340 107589022 : if (rcur == NULL)
1341 : return;
1342 107638417 : agbp = rcur->bc_ag.agbp;
1343 107588793 : xfs_btree_del_cursor(rcur, error);
1344 107588819 : if (error)
1345 552 : xfs_trans_brelse(tp, agbp);
1346 : }
1347 :
1348 : /*
1349 : * Set up a continuation a deferred refcount operation by updating the intent.
1350 : * Checks to make sure we're not going to run off the end of the AG.
1351 : */
1352 : static inline int
1353 391 : xfs_refcount_continue_op(
1354 : struct xfs_btree_cur *cur,
1355 : struct xfs_refcount_intent *ri,
1356 : xfs_agblock_t new_agbno)
1357 : {
1358 391 : struct xfs_mount *mp = cur->bc_mp;
1359 391 : struct xfs_perag *pag = cur->bc_ag.pag;
1360 :
1361 391 : if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
1362 : ri->ri_blockcount))) {
1363 0 : xfs_btree_mark_sick(cur);
1364 0 : return -EFSCORRUPTED;
1365 : }
1366 :
1367 391 : ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
1368 :
1369 391 : ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
1370 391 : ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
1371 :
1372 : return 0;
1373 : }
1374 :
1375 : /*
1376 : * Process one of the deferred refcount operations. We pass back the
1377 : * btree cursor to maintain our lock on the btree between calls.
1378 : * This saves time and eliminates a buffer deadlock between the
1379 : * superblock and the AGF because we'll always grab them in the same
1380 : * order.
1381 : */
1382 : int
1383 107874027 : xfs_refcount_finish_one(
1384 : struct xfs_trans *tp,
1385 : struct xfs_refcount_intent *ri,
1386 : struct xfs_btree_cur **pcur)
1387 : {
1388 107874027 : struct xfs_mount *mp = tp->t_mountp;
1389 107874027 : struct xfs_btree_cur *rcur;
1390 107874027 : struct xfs_buf *agbp = NULL;
1391 107874027 : int error = 0;
1392 107874027 : xfs_agblock_t bno;
1393 107874027 : unsigned long nr_ops = 0;
1394 107874027 : int shape_changes = 0;
1395 :
1396 107874027 : bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
1397 :
1398 107874027 : trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
1399 107874027 : ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
1400 : ri->ri_blockcount);
1401 :
1402 107874472 : if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
1403 : return -EIO;
1404 :
1405 : /*
1406 : * If we haven't gotten a cursor or the cursor AG doesn't match
1407 : * the startblock, get one now.
1408 : */
1409 107874452 : rcur = *pcur;
1410 107874452 : if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
1411 49624 : nr_ops = rcur->bc_ag.refc.nr_ops;
1412 49624 : shape_changes = rcur->bc_ag.refc.shape_changes;
1413 49624 : xfs_refcount_finish_one_cleanup(tp, rcur, 0);
1414 49624 : rcur = NULL;
1415 49624 : *pcur = NULL;
1416 : }
1417 107874452 : if (rcur == NULL) {
1418 107638642 : error = xfs_alloc_read_agf(ri->ri_pag, tp,
1419 : XFS_ALLOC_FLAG_FREEING, &agbp);
1420 107638675 : if (error)
1421 : return error;
1422 :
1423 107638444 : rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
1424 107638421 : rcur->bc_ag.refc.nr_ops = nr_ops;
1425 107638421 : rcur->bc_ag.refc.shape_changes = shape_changes;
1426 : }
1427 107874231 : *pcur = rcur;
1428 :
1429 107874231 : switch (ri->ri_type) {
1430 50340379 : case XFS_REFCOUNT_INCREASE:
1431 50340379 : error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1432 : XFS_REFCOUNT_ADJUST_INCREASE);
1433 50340379 : if (error)
1434 : return error;
1435 50340280 : if (ri->ri_blockcount > 0)
1436 42 : error = xfs_refcount_continue_op(rcur, ri, bno);
1437 : break;
1438 54964610 : case XFS_REFCOUNT_DECREASE:
1439 54964610 : error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1440 : XFS_REFCOUNT_ADJUST_DECREASE);
1441 54964571 : if (error)
1442 : return error;
1443 54964509 : if (ri->ri_blockcount > 0)
1444 349 : error = xfs_refcount_continue_op(rcur, ri, bno);
1445 : break;
1446 338412 : case XFS_REFCOUNT_ALLOC_COW:
1447 338412 : error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount);
1448 338423 : if (error)
1449 : return error;
1450 338423 : ri->ri_blockcount = 0;
1451 338423 : break;
1452 2230830 : case XFS_REFCOUNT_FREE_COW:
1453 2230830 : error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount);
1454 2230830 : if (error)
1455 : return error;
1456 2230830 : ri->ri_blockcount = 0;
1457 2230830 : break;
1458 0 : default:
1459 0 : ASSERT(0);
1460 0 : return -EFSCORRUPTED;
1461 : }
1462 107874042 : if (!error && ri->ri_blockcount > 0)
1463 391 : trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno,
1464 391 : ri->ri_type, bno, ri->ri_blockcount);
1465 : return error;
1466 : }
1467 :
1468 : /*
1469 : * Record a refcount intent for later processing.
1470 : */
1471 : static void
1472 107873688 : __xfs_refcount_add(
1473 : struct xfs_trans *tp,
1474 : enum xfs_refcount_intent_type type,
1475 : xfs_fsblock_t startblock,
1476 : xfs_extlen_t blockcount)
1477 : {
1478 107873688 : struct xfs_refcount_intent *ri;
1479 :
1480 107873688 : trace_xfs_refcount_defer(tp->t_mountp,
1481 107873688 : XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
1482 : type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
1483 : blockcount);
1484 :
1485 107873720 : ri = kmem_cache_alloc(xfs_refcount_intent_cache,
1486 : GFP_NOFS | __GFP_NOFAIL);
1487 107873616 : INIT_LIST_HEAD(&ri->ri_list);
1488 107873616 : ri->ri_type = type;
1489 107873616 : ri->ri_startblock = startblock;
1490 107873616 : ri->ri_blockcount = blockcount;
1491 :
1492 107873616 : xfs_refcount_update_get_group(tp->t_mountp, ri);
1493 107873335 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
1494 107873901 : }
1495 :
1496 : /*
1497 : * Increase the reference count of the blocks backing a file's extent.
1498 : */
1499 : void
1500 50340213 : xfs_refcount_increase_extent(
1501 : struct xfs_trans *tp,
1502 : struct xfs_bmbt_irec *PREV)
1503 : {
1504 50340213 : if (!xfs_has_reflink(tp->t_mountp))
1505 : return;
1506 :
1507 50340200 : __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock,
1508 50340200 : PREV->br_blockcount);
1509 : }
1510 :
1511 : /*
1512 : * Decrease the reference count of the blocks backing a file's extent.
1513 : */
1514 : void
1515 54964542 : xfs_refcount_decrease_extent(
1516 : struct xfs_trans *tp,
1517 : struct xfs_bmbt_irec *PREV)
1518 : {
1519 54964542 : if (!xfs_has_reflink(tp->t_mountp))
1520 : return;
1521 :
1522 54964572 : __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock,
1523 54964572 : PREV->br_blockcount);
1524 : }
1525 :
1526 : /*
1527 : * Given an AG extent, find the lowest-numbered run of shared blocks
1528 : * within that range and return the range in fbno/flen. If
1529 : * find_end_of_shared is set, return the longest contiguous extent of
1530 : * shared blocks; if not, just return the first extent we find. If no
1531 : * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
1532 : * and 0, respectively.
1533 : */
1534 : int
1535 1001102271 : xfs_refcount_find_shared(
1536 : struct xfs_btree_cur *cur,
1537 : xfs_agblock_t agbno,
1538 : xfs_extlen_t aglen,
1539 : xfs_agblock_t *fbno,
1540 : xfs_extlen_t *flen,
1541 : bool find_end_of_shared)
1542 : {
1543 1001102271 : struct xfs_refcount_irec tmp;
1544 1001102271 : int i;
1545 1001102271 : int have;
1546 1001102271 : int error;
1547 :
1548 1001102271 : trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1549 : agbno, aglen);
1550 :
1551 : /* By default, skip the whole range */
1552 1001131460 : *fbno = NULLAGBLOCK;
1553 1001131460 : *flen = 0;
1554 :
1555 : /* Try to find a refcount extent that crosses the start */
1556 1001131460 : error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno,
1557 : &have);
1558 1001189041 : if (error)
1559 19 : goto out_error;
1560 1001189022 : if (!have) {
1561 : /* No left extent, look at the next one */
1562 57003518 : error = xfs_btree_increment(cur, 0, &have);
1563 57000879 : if (error)
1564 0 : goto out_error;
1565 57000879 : if (!have)
1566 22271185 : goto done;
1567 : }
1568 978915198 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1569 978936763 : if (error)
1570 0 : goto out_error;
1571 978936763 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1572 0 : xfs_btree_mark_sick(cur);
1573 0 : error = -EFSCORRUPTED;
1574 0 : goto out_error;
1575 : }
1576 978936763 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1577 321710 : goto done;
1578 :
1579 : /* If the extent ends before the start, look at the next one */
1580 978615053 : if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
1581 899393813 : error = xfs_btree_increment(cur, 0, &have);
1582 899344598 : if (error)
1583 0 : goto out_error;
1584 899344598 : if (!have)
1585 12416797 : goto done;
1586 886927801 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1587 886928767 : if (error)
1588 0 : goto out_error;
1589 886928767 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1590 0 : xfs_btree_mark_sick(cur);
1591 0 : error = -EFSCORRUPTED;
1592 0 : goto out_error;
1593 : }
1594 886928767 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1595 27264399 : goto done;
1596 : }
1597 :
1598 : /* If the extent starts after the range we want, bail out */
1599 938885608 : if (tmp.rc_startblock >= agbno + aglen)
1600 892820771 : goto done;
1601 :
1602 : /* We found the start of a shared extent! */
1603 46064837 : if (tmp.rc_startblock < agbno) {
1604 311899 : tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
1605 311899 : tmp.rc_startblock = agbno;
1606 : }
1607 :
1608 46064837 : *fbno = tmp.rc_startblock;
1609 46064837 : *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
1610 46064837 : if (!find_end_of_shared)
1611 45175070 : goto done;
1612 :
1613 : /* Otherwise, find the end of this shared extent */
1614 956357 : while (*fbno + *flen < agbno + aglen) {
1615 90676 : error = xfs_btree_increment(cur, 0, &have);
1616 90715 : if (error)
1617 0 : goto out_error;
1618 90715 : if (!have)
1619 : break;
1620 88677 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1621 88677 : if (error)
1622 0 : goto out_error;
1623 88677 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1624 0 : xfs_btree_mark_sick(cur);
1625 0 : error = -EFSCORRUPTED;
1626 0 : goto out_error;
1627 : }
1628 88677 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED ||
1629 87785 : tmp.rc_startblock >= agbno + aglen ||
1630 69102 : tmp.rc_startblock != *fbno + *flen)
1631 : break;
1632 66590 : *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
1633 : }
1634 :
1635 889806 : done:
1636 1001159738 : trace_xfs_refcount_find_shared_result(cur->bc_mp,
1637 1001159738 : cur->bc_ag.pag->pag_agno, *fbno, *flen);
1638 :
1639 1001126679 : out_error:
1640 1001126679 : if (error)
1641 19 : trace_xfs_refcount_find_shared_error(cur->bc_mp,
1642 19 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1643 1001126679 : return error;
1644 : }
1645 :
1646 : /*
1647 : * Recovering CoW Blocks After a Crash
1648 : *
1649 : * Due to the way that the copy on write mechanism works, there's a window of
1650 : * opportunity in which we can lose track of allocated blocks during a crash.
1651 : * Because CoW uses delayed allocation in the in-core CoW fork, writeback
1652 : * causes blocks to be allocated and stored in the CoW fork. The blocks are
1653 : * no longer in the free space btree but are not otherwise recorded anywhere
1654 : * until the write completes and the blocks are mapped into the file. A crash
1655 : * in between allocation and remapping results in the replacement blocks being
1656 : * lost. This situation is exacerbated by the CoW extent size hint because
1657 : * allocations can hang around for long time.
1658 : *
1659 : * However, there is a place where we can record these allocations before they
1660 : * become mappings -- the reference count btree. The btree does not record
1661 : * extents with refcount == 1, so we can record allocations with a refcount of
1662 : * 1. Blocks being used for CoW writeout cannot be shared, so there should be
1663 : * no conflict with shared block records. These mappings should be created
1664 : * when we allocate blocks to the CoW fork and deleted when they're removed
1665 : * from the CoW fork.
1666 : *
1667 : * Minor nit: records for in-progress CoW allocations and records for shared
1668 : * extents must never be merged, to preserve the property that (except for CoW
1669 : * allocations) there are no refcount btree entries with refcount == 1. The
1670 : * only time this could potentially happen is when unsharing a block that's
1671 : * adjacent to CoW allocations, so we must be careful to avoid this.
1672 : *
1673 : * At mount time we recover lost CoW allocations by searching the refcount
1674 : * btree for these refcount == 1 mappings. These represent CoW allocations
1675 : * that were in progress at the time the filesystem went down, so we can free
1676 : * them to get the space back.
1677 : *
1678 : * This mechanism is superior to creating EFIs for unmapped CoW extents for
1679 : * several reasons -- first, EFIs pin the tail of the log and would have to be
1680 : * periodically relogged to avoid filling up the log. Second, CoW completions
1681 : * will have to file an EFD and create new EFIs for whatever remains in the
1682 : * CoW fork; this partially takes care of (1) but extent-size reservations
1683 : * will have to periodically relog even if there's no writeout in progress.
1684 : * This can happen if the CoW extent size hint is set, which you really want.
1685 : * Third, EFIs cannot currently be automatically relogged into newer
1686 : * transactions to advance the log tail. Fourth, stuffing the log full of
1687 : * EFIs places an upper bound on the number of CoW allocations that can be
1688 : * held filesystem-wide at any given time. Recording them in the refcount
1689 : * btree doesn't require us to maintain any state in memory and doesn't pin
1690 : * the log.
1691 : */
1692 : /*
1693 : * Adjust the refcounts of CoW allocations. These allocations are "magic"
1694 : * in that they're not referenced anywhere else in the filesystem, so we
1695 : * stash them in the refcount btree with a refcount of 1 until either file
1696 : * remapping (or CoW cancellation) happens.
1697 : */
1698 : STATIC int
1699 2569251 : xfs_refcount_adjust_cow_extents(
1700 : struct xfs_btree_cur *cur,
1701 : xfs_agblock_t agbno,
1702 : xfs_extlen_t aglen,
1703 : enum xfs_refc_adjust_op adj)
1704 : {
1705 2569251 : struct xfs_refcount_irec ext, tmp;
1706 2569251 : int error;
1707 2569251 : int found_rec, found_tmp;
1708 :
1709 2569251 : if (aglen == 0)
1710 : return 0;
1711 :
1712 : /* Find any overlapping refcount records */
1713 2440951 : error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno,
1714 : &found_rec);
1715 2440950 : if (error)
1716 0 : goto out_error;
1717 2440950 : error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1718 2440950 : if (error)
1719 0 : goto out_error;
1720 2440950 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
1721 : ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
1722 0 : xfs_btree_mark_sick(cur);
1723 0 : error = -EFSCORRUPTED;
1724 0 : goto out_error;
1725 : }
1726 2440950 : if (!found_rec) {
1727 120688 : ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1728 120688 : ext.rc_blockcount = 0;
1729 120688 : ext.rc_refcount = 0;
1730 120688 : ext.rc_domain = XFS_REFC_DOMAIN_COW;
1731 : }
1732 :
1733 2440950 : switch (adj) {
1734 210120 : case XFS_REFCOUNT_ADJUST_COW_ALLOC:
1735 : /* Adding a CoW reservation, there should be nothing here. */
1736 210120 : if (XFS_IS_CORRUPT(cur->bc_mp,
1737 : agbno + aglen > ext.rc_startblock)) {
1738 0 : xfs_btree_mark_sick(cur);
1739 0 : error = -EFSCORRUPTED;
1740 0 : goto out_error;
1741 : }
1742 :
1743 210120 : tmp.rc_startblock = agbno;
1744 210120 : tmp.rc_blockcount = aglen;
1745 210120 : tmp.rc_refcount = 1;
1746 210120 : tmp.rc_domain = XFS_REFC_DOMAIN_COW;
1747 :
1748 210120 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1749 210120 : cur->bc_ag.pag->pag_agno, &tmp);
1750 :
1751 210118 : error = xfs_refcount_insert(cur, &tmp,
1752 : &found_tmp);
1753 210119 : if (error)
1754 0 : goto out_error;
1755 210119 : if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) {
1756 0 : xfs_btree_mark_sick(cur);
1757 0 : error = -EFSCORRUPTED;
1758 0 : goto out_error;
1759 : }
1760 : break;
1761 2230830 : case XFS_REFCOUNT_ADJUST_COW_FREE:
1762 : /* Removing a CoW reservation, there should be one extent. */
1763 2230830 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) {
1764 0 : xfs_btree_mark_sick(cur);
1765 0 : error = -EFSCORRUPTED;
1766 0 : goto out_error;
1767 : }
1768 2230830 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) {
1769 0 : xfs_btree_mark_sick(cur);
1770 0 : error = -EFSCORRUPTED;
1771 0 : goto out_error;
1772 : }
1773 2230830 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) {
1774 0 : xfs_btree_mark_sick(cur);
1775 0 : error = -EFSCORRUPTED;
1776 0 : goto out_error;
1777 : }
1778 :
1779 2230830 : ext.rc_refcount = 0;
1780 2230830 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1781 2230830 : cur->bc_ag.pag->pag_agno, &ext);
1782 2230830 : error = xfs_refcount_delete(cur, &found_rec);
1783 2230830 : if (error)
1784 0 : goto out_error;
1785 2230830 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1786 0 : xfs_btree_mark_sick(cur);
1787 0 : error = -EFSCORRUPTED;
1788 0 : goto out_error;
1789 : }
1790 : break;
1791 0 : default:
1792 0 : ASSERT(0);
1793 : }
1794 :
1795 : return error;
1796 0 : out_error:
1797 0 : trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1798 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1799 0 : return error;
1800 : }
1801 :
1802 : /*
1803 : * Add or remove refcount btree entries for CoW reservations.
1804 : */
1805 : STATIC int
1806 2569236 : xfs_refcount_adjust_cow(
1807 : struct xfs_btree_cur *cur,
1808 : xfs_agblock_t agbno,
1809 : xfs_extlen_t aglen,
1810 : enum xfs_refc_adjust_op adj)
1811 : {
1812 2569236 : bool shape_changed;
1813 2569236 : int error;
1814 :
1815 : /*
1816 : * Ensure that no rcextents cross the boundary of the adjustment range.
1817 : */
1818 2569236 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1819 : agbno, &shape_changed);
1820 2569239 : if (error)
1821 0 : goto out_error;
1822 :
1823 2569239 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1824 : agbno + aglen, &shape_changed);
1825 2569251 : if (error)
1826 0 : goto out_error;
1827 :
1828 : /*
1829 : * Try to merge with the left or right extents of the range.
1830 : */
1831 2569251 : error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno,
1832 : &aglen, adj, &shape_changed);
1833 2569248 : if (error)
1834 0 : goto out_error;
1835 :
1836 : /* Now that we've taken care of the ends, adjust the middle extents */
1837 2569248 : error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
1838 2569244 : if (error)
1839 0 : goto out_error;
1840 :
1841 : return 0;
1842 :
1843 0 : out_error:
1844 0 : trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1845 0 : error, _RET_IP_);
1846 0 : return error;
1847 : }
1848 :
1849 : /*
1850 : * Record a CoW allocation in the refcount btree.
1851 : */
1852 : STATIC int
1853 338422 : __xfs_refcount_cow_alloc(
1854 : struct xfs_btree_cur *rcur,
1855 : xfs_agblock_t agbno,
1856 : xfs_extlen_t aglen)
1857 : {
1858 338422 : trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1859 : agbno, aglen);
1860 :
1861 : /* Add refcount btree reservation */
1862 338422 : return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1863 : XFS_REFCOUNT_ADJUST_COW_ALLOC);
1864 : }
1865 :
1866 : /*
1867 : * Remove a CoW allocation from the refcount btree.
1868 : */
1869 : STATIC int
1870 2230830 : __xfs_refcount_cow_free(
1871 : struct xfs_btree_cur *rcur,
1872 : xfs_agblock_t agbno,
1873 : xfs_extlen_t aglen)
1874 : {
1875 2230830 : trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1876 : agbno, aglen);
1877 :
1878 : /* Remove refcount btree reservation */
1879 2230830 : return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1880 : XFS_REFCOUNT_ADJUST_COW_FREE);
1881 : }
1882 :
1883 : /* Record a CoW staging extent in the refcount btree. */
1884 : void
1885 338402 : xfs_refcount_alloc_cow_extent(
1886 : struct xfs_trans *tp,
1887 : xfs_fsblock_t fsb,
1888 : xfs_extlen_t len)
1889 : {
1890 338402 : struct xfs_mount *mp = tp->t_mountp;
1891 :
1892 338402 : if (!xfs_has_reflink(mp))
1893 : return;
1894 :
1895 338391 : __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
1896 :
1897 : /* Add rmap entry */
1898 338407 : xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1899 : XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1900 : }
1901 :
1902 : /* Forget a CoW staging event in the refcount btree. */
1903 : void
1904 2230826 : xfs_refcount_free_cow_extent(
1905 : struct xfs_trans *tp,
1906 : xfs_fsblock_t fsb,
1907 : xfs_extlen_t len)
1908 : {
1909 2230826 : struct xfs_mount *mp = tp->t_mountp;
1910 :
1911 2230826 : if (!xfs_has_reflink(mp))
1912 : return;
1913 :
1914 : /* Remove rmap entry */
1915 2230826 : xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1916 : XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1917 2230826 : __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
1918 : }
1919 :
1920 : struct xfs_refcount_recovery {
1921 : struct list_head rr_list;
1922 : struct xfs_refcount_irec rr_rrec;
1923 : };
1924 :
1925 : /* Stuff an extent on the recovery list. */
1926 : STATIC int
1927 269875 : xfs_refcount_recover_extent(
1928 : struct xfs_btree_cur *cur,
1929 : const union xfs_btree_rec *rec,
1930 : void *priv)
1931 : {
1932 269875 : struct list_head *debris = priv;
1933 269875 : struct xfs_refcount_recovery *rr;
1934 :
1935 269875 : if (XFS_IS_CORRUPT(cur->bc_mp,
1936 : be32_to_cpu(rec->refc.rc_refcount) != 1)) {
1937 0 : xfs_btree_mark_sick(cur);
1938 0 : return -EFSCORRUPTED;
1939 : }
1940 :
1941 269875 : rr = kmalloc(sizeof(struct xfs_refcount_recovery),
1942 : GFP_KERNEL | __GFP_NOFAIL);
1943 269875 : INIT_LIST_HEAD(&rr->rr_list);
1944 269875 : xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
1945 :
1946 269875 : if (xfs_refcount_check_irec(cur, &rr->rr_rrec) != NULL ||
1947 269875 : XFS_IS_CORRUPT(cur->bc_mp,
1948 : rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
1949 0 : xfs_btree_mark_sick(cur);
1950 0 : kfree(rr);
1951 0 : return -EFSCORRUPTED;
1952 : }
1953 :
1954 269875 : list_add_tail(&rr->rr_list, debris);
1955 269875 : return 0;
1956 : }
1957 :
1958 : /* Find and remove leftover CoW reservations. */
1959 : int
1960 45082 : xfs_refcount_recover_cow_leftovers(
1961 : struct xfs_mount *mp,
1962 : struct xfs_perag *pag)
1963 : {
1964 45082 : struct xfs_trans *tp;
1965 45082 : struct xfs_btree_cur *cur;
1966 45082 : struct xfs_buf *agbp;
1967 45082 : struct xfs_refcount_recovery *rr, *n;
1968 45082 : struct list_head debris;
1969 45082 : union xfs_btree_irec low = {
1970 : .rc.rc_domain = XFS_REFC_DOMAIN_COW,
1971 : };
1972 45082 : union xfs_btree_irec high = {
1973 : .rc.rc_domain = XFS_REFC_DOMAIN_COW,
1974 : .rc.rc_startblock = -1U,
1975 : };
1976 45082 : xfs_fsblock_t fsb;
1977 45082 : int error;
1978 :
1979 : /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */
1980 45082 : BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG);
1981 45082 : if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS)
1982 : return -EOPNOTSUPP;
1983 :
1984 45082 : INIT_LIST_HEAD(&debris);
1985 :
1986 : /*
1987 : * In this first part, we use an empty transaction to gather up
1988 : * all the leftover CoW extents so that we can subsequently
1989 : * delete them. The empty transaction is used to avoid
1990 : * a buffer lock deadlock if there happens to be a loop in the
1991 : * refcountbt because we're allowed to re-grab a buffer that is
1992 : * already attached to our transaction. When we're done
1993 : * recording the CoW debris we cancel the (empty) transaction
1994 : * and everything goes away cleanly.
1995 : */
1996 45082 : error = xfs_trans_alloc_empty(mp, &tp);
1997 45082 : if (error)
1998 : return error;
1999 :
2000 45082 : error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
2001 45082 : if (error)
2002 4 : goto out_trans;
2003 45078 : cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
2004 :
2005 : /* Find all the leftover CoW staging extents. */
2006 45078 : error = xfs_btree_query_range(cur, &low, &high,
2007 : xfs_refcount_recover_extent, &debris);
2008 45078 : xfs_btree_del_cursor(cur, error);
2009 45078 : xfs_trans_brelse(tp, agbp);
2010 45078 : xfs_trans_cancel(tp);
2011 45078 : if (error)
2012 4 : goto out_free;
2013 :
2014 : /* Now iterate the list to free the leftovers */
2015 314949 : list_for_each_entry_safe(rr, n, &debris, rr_list) {
2016 : /* Set up transaction. */
2017 269875 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
2018 269875 : if (error)
2019 0 : goto out_free;
2020 :
2021 269875 : trace_xfs_refcount_recover_extent(mp, pag->pag_agno,
2022 : &rr->rr_rrec);
2023 :
2024 : /* Free the orphan record */
2025 269875 : fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
2026 : rr->rr_rrec.rc_startblock);
2027 269875 : xfs_refcount_free_cow_extent(tp, fsb,
2028 : rr->rr_rrec.rc_blockcount);
2029 :
2030 : /* Free the block. */
2031 269875 : error = xfs_free_extent_later(tp, fsb,
2032 269875 : rr->rr_rrec.rc_blockcount, NULL,
2033 : XFS_AG_RESV_NONE);
2034 269875 : if (error)
2035 0 : goto out_trans;
2036 :
2037 269875 : error = xfs_trans_commit(tp);
2038 269875 : if (error)
2039 0 : goto out_free;
2040 :
2041 269875 : list_del(&rr->rr_list);
2042 269875 : kfree(rr);
2043 : }
2044 :
2045 : return error;
2046 4 : out_trans:
2047 4 : xfs_trans_cancel(tp);
2048 8 : out_free:
2049 : /* Free the leftover list */
2050 8 : list_for_each_entry_safe(rr, n, &debris, rr_list) {
2051 0 : list_del(&rr->rr_list);
2052 0 : kfree(rr);
2053 : }
2054 : return error;
2055 : }
2056 :
2057 : /*
2058 : * Scan part of the keyspace of the refcount records and tell us if the area
2059 : * has no records, is fully mapped by records, or is partially filled.
2060 : */
2061 : int
2062 1532837811 : xfs_refcount_has_records(
2063 : struct xfs_btree_cur *cur,
2064 : enum xfs_refc_domain domain,
2065 : xfs_agblock_t bno,
2066 : xfs_extlen_t len,
2067 : enum xbtree_recpacking *outcome)
2068 : {
2069 1532837811 : union xfs_btree_irec low;
2070 1532837811 : union xfs_btree_irec high;
2071 :
2072 1532837811 : memset(&low, 0, sizeof(low));
2073 1532837811 : low.rc.rc_startblock = bno;
2074 1532837811 : memset(&high, 0xFF, sizeof(high));
2075 1532837811 : high.rc.rc_startblock = bno + len - 1;
2076 1532837811 : low.rc.rc_domain = high.rc.rc_domain = domain;
2077 :
2078 1532837811 : return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
2079 : }
2080 :
2081 : struct xfs_refcount_query_range_info {
2082 : xfs_refcount_query_range_fn fn;
2083 : void *priv;
2084 : };
2085 :
2086 : /* Format btree record and pass to our callback. */
2087 : STATIC int
2088 5107 : xfs_refcount_query_range_helper(
2089 : struct xfs_btree_cur *cur,
2090 : const union xfs_btree_rec *rec,
2091 : void *priv)
2092 : {
2093 5107 : struct xfs_refcount_query_range_info *query = priv;
2094 5107 : struct xfs_refcount_irec irec;
2095 5107 : xfs_failaddr_t fa;
2096 :
2097 5107 : xfs_refcount_btrec_to_irec(rec, &irec);
2098 5107 : fa = xfs_refcount_check_irec(cur, &irec);
2099 5107 : if (fa)
2100 0 : return xfs_refcount_complain_bad_rec(cur, fa, &irec);
2101 :
2102 5107 : return query->fn(cur, &irec, query->priv);
2103 : }
2104 :
2105 : /* Find all refcount records between two keys. */
2106 : int
2107 7291 : xfs_refcount_query_range(
2108 : struct xfs_btree_cur *cur,
2109 : const struct xfs_refcount_irec *low_rec,
2110 : const struct xfs_refcount_irec *high_rec,
2111 : xfs_refcount_query_range_fn fn,
2112 : void *priv)
2113 : {
2114 7291 : union xfs_btree_irec low_brec = { .rc = *low_rec };
2115 7291 : union xfs_btree_irec high_brec = { .rc = *high_rec };
2116 7291 : struct xfs_refcount_query_range_info query = { .priv = priv, .fn = fn };
2117 :
2118 7291 : return xfs_btree_query_range(cur, &low_brec, &high_brec,
2119 : xfs_refcount_query_range_helper, &query);
2120 : }
2121 :
2122 : int __init
2123 12 : xfs_refcount_intent_init_cache(void)
2124 : {
2125 12 : xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent",
2126 : sizeof(struct xfs_refcount_intent),
2127 : 0, 0, NULL);
2128 :
2129 12 : return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM;
2130 : }
2131 :
2132 : void
2133 12 : xfs_refcount_intent_destroy_cache(void)
2134 : {
2135 12 : kmem_cache_destroy(xfs_refcount_intent_cache);
2136 12 : xfs_refcount_intent_cache = NULL;
2137 12 : }
|