Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2010 Red Hat, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_shared.h"
8 : #include "xfs_format.h"
9 : #include "xfs_log_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_alloc_btree.h"
14 : #include "xfs_alloc.h"
15 : #include "xfs_discard.h"
16 : #include "xfs_error.h"
17 : #include "xfs_extent_busy.h"
18 : #include "xfs_trace.h"
19 : #include "xfs_log.h"
20 : #include "xfs_ag.h"
21 : #include "xfs_health.h"
22 : #include "xfs_rtbitmap.h"
23 :
24 : /*
25 : * For trim functions that support it, cycle the metadata locks periodically
26 : * to prevent other parts of the filesystem from starving.
27 : */
28 : #define XFS_TRIM_RELAX_INTERVAL (HZ)
29 :
30 : /* Trim the free space in this AG by block number. */
31 : static inline int
32 293141 : xfs_trim_ag_bybno(
33 : struct xfs_perag *pag,
34 : struct xfs_buf **agbpp,
35 : xfs_daddr_t start,
36 : xfs_daddr_t end,
37 : xfs_daddr_t minlen,
38 : uint64_t *blocks_trimmed)
39 : {
40 293141 : struct xfs_mount *mp = pag->pag_mount;
41 293141 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
42 293141 : struct xfs_btree_cur *cur;
43 293141 : struct xfs_agf *agf = (*agbpp)->b_addr;
44 293141 : xfs_daddr_t end_daddr;
45 293141 : xfs_agnumber_t agno = pag->pag_agno;
46 293141 : xfs_agblock_t start_agbno;
47 293141 : xfs_agblock_t end_agbno;
48 293141 : xfs_extlen_t minlen_fsb = XFS_BB_TO_FSB(mp, minlen);
49 293141 : unsigned long last_relax = jiffies;
50 293141 : int i;
51 293141 : int error;
52 :
53 293141 : start = max(start, XFS_AGB_TO_DADDR(mp, agno, 0));
54 293141 : start_agbno = xfs_daddr_to_agbno(mp, start);
55 :
56 293141 : end_daddr = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(agf->agf_length));
57 293141 : end = min(end, end_daddr - 1);
58 293141 : end_agbno = xfs_daddr_to_agbno(mp, end);
59 :
60 293141 : cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag, XFS_BTNUM_BNO);
61 :
62 293141 : error = xfs_alloc_lookup_le(cur, start_agbno, 0, &i);
63 293141 : if (error)
64 0 : goto out_del_cursor;
65 :
66 : /*
67 : * If we didn't find anything at or below start_agbno, increment the
68 : * cursor to see if there's another record above it.
69 : */
70 293141 : if (!i) {
71 263094 : error = xfs_btree_increment(cur, 0, &i);
72 263094 : if (error)
73 0 : goto out_del_cursor;
74 : }
75 :
76 : /* Loop the entire range that was asked for. */
77 28502875 : while (i) {
78 28346395 : xfs_agblock_t fbno;
79 28346395 : xfs_extlen_t flen;
80 28346395 : xfs_daddr_t dbno;
81 28346395 : xfs_extlen_t dlen;
82 :
83 28346395 : error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
84 28346395 : if (error)
85 17 : goto out_del_cursor;
86 28346395 : if (XFS_IS_CORRUPT(mp, i != 1)) {
87 0 : xfs_btree_mark_sick(cur);
88 0 : error = -EFSCORRUPTED;
89 0 : goto out_del_cursor;
90 : }
91 :
92 : /* Skip extents entirely outside of the range. */
93 28346395 : if (fbno >= end_agbno)
94 : break;
95 28209751 : if (fbno + flen < start_agbno)
96 623 : goto next_extent;
97 :
98 : /* Trim the extent returned to the range we want. */
99 28209128 : if (fbno < start_agbno) {
100 29420 : flen -= start_agbno - fbno;
101 29420 : fbno = start_agbno;
102 : }
103 28209128 : if (fbno + flen > end_agbno + 1)
104 28302 : flen = end_agbno - fbno + 1;
105 :
106 : /* Ignore too small. */
107 28209128 : if (flen < minlen_fsb) {
108 27153099 : trace_xfs_discard_toosmall(mp, agno, fbno, flen);
109 27153099 : goto next_extent;
110 : }
111 :
112 : /*
113 : * If any blocks in the range are still busy, skip the
114 : * discard and try again the next time.
115 : */
116 1056029 : if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
117 139 : trace_xfs_discard_busy(mp, agno, fbno, flen);
118 139 : goto next_extent;
119 : }
120 :
121 1055890 : trace_xfs_discard_extent(mp, agno, fbno, flen);
122 :
123 1055890 : dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
124 1055890 : dlen = XFS_FSB_TO_BB(mp, flen);
125 1055890 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
126 1055890 : if (error)
127 0 : goto out_del_cursor;
128 1055890 : *blocks_trimmed += flen;
129 :
130 1055890 : if (time_after(jiffies, last_relax + XFS_TRIM_RELAX_INTERVAL)) {
131 : /*
132 : * Cycle the AGF lock since we know how to pick up
133 : * where we left off.
134 : */
135 347 : trace_xfs_discard_relax(mp, agno, fbno, flen);
136 347 : xfs_btree_del_cursor(cur, error);
137 347 : xfs_buf_relse(*agbpp);
138 :
139 347 : error = xfs_alloc_read_agf(pag, NULL, 0, agbpp);
140 347 : if (error)
141 0 : return error;
142 :
143 347 : cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag,
144 : XFS_BTNUM_BNO);
145 347 : error = xfs_alloc_lookup_ge(cur, fbno + flen, 0, &i);
146 347 : last_relax = jiffies;
147 : } else {
148 1055543 : next_extent:
149 28209404 : error = xfs_btree_increment(cur, 0, &i);
150 : }
151 28209751 : if (error)
152 0 : goto out_del_cursor;
153 :
154 28209751 : if (fatal_signal_pending(current)) {
155 17 : error = -ERESTARTSYS;
156 17 : goto out_del_cursor;
157 : }
158 : }
159 :
160 156480 : out_del_cursor:
161 293141 : xfs_btree_del_cursor(cur, error);
162 293141 : return error;
163 : }
164 :
165 : /* Trim the free space in this AG by length. */
166 : static inline int
167 15622 : xfs_trim_ag_bylen(
168 : struct xfs_perag *pag,
169 : struct xfs_buf *agbp,
170 : xfs_daddr_t start,
171 : xfs_daddr_t end,
172 : xfs_daddr_t minlen,
173 : uint64_t *blocks_trimmed)
174 : {
175 15622 : struct xfs_mount *mp = pag->pag_mount;
176 15622 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
177 15622 : struct xfs_btree_cur *cur;
178 15622 : struct xfs_agf *agf = agbp->b_addr;
179 15622 : int error;
180 15622 : int i;
181 :
182 15622 : cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
183 :
184 : /*
185 : * Look up the longest btree in the AGF and start with it.
186 : */
187 15621 : error = xfs_alloc_lookup_ge(cur, 0, be32_to_cpu(agf->agf_longest), &i);
188 15621 : if (error)
189 0 : goto out_del_cursor;
190 :
191 : /*
192 : * Loop until we are done with all extents that are large
193 : * enough to be worth discarding.
194 : */
195 115695 : while (i) {
196 110647 : xfs_agblock_t fbno;
197 110647 : xfs_extlen_t flen;
198 110647 : xfs_daddr_t dbno;
199 110647 : xfs_extlen_t dlen;
200 :
201 110647 : error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
202 110645 : if (error)
203 : break;
204 110645 : if (XFS_IS_CORRUPT(mp, i != 1)) {
205 0 : xfs_btree_mark_sick(cur);
206 0 : error = -EFSCORRUPTED;
207 0 : break;
208 : }
209 110645 : ASSERT(flen <= be32_to_cpu(agf->agf_longest));
210 :
211 : /*
212 : * use daddr format for all range/len calculations as that is
213 : * the format the range/len variables are supplied in by
214 : * userspace.
215 : */
216 110645 : dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
217 110645 : dlen = XFS_FSB_TO_BB(mp, flen);
218 :
219 : /*
220 : * Too small? Give up.
221 : */
222 110645 : if (dlen < minlen) {
223 10572 : trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno,
224 : flen);
225 10572 : break;
226 : }
227 :
228 : /*
229 : * If any blocks in the range are still busy, skip the
230 : * discard and try again the next time.
231 : */
232 100073 : if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
233 181 : trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
234 181 : goto next_extent;
235 : }
236 :
237 99892 : trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen);
238 99890 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
239 99896 : if (error)
240 : break;
241 99896 : *blocks_trimmed += flen;
242 :
243 100077 : next_extent:
244 100077 : error = xfs_btree_decrement(cur, 0, &i);
245 100077 : if (error)
246 : break;
247 :
248 100077 : if (fatal_signal_pending(current)) {
249 : error = -ERESTARTSYS;
250 : break;
251 : }
252 : }
253 :
254 5048 : out_del_cursor:
255 15623 : xfs_btree_del_cursor(cur, error);
256 15623 : return error;
257 : }
258 :
259 : STATIC int
260 308764 : xfs_trim_ag_extents(
261 : struct xfs_perag *pag,
262 : xfs_daddr_t start,
263 : xfs_daddr_t end,
264 : xfs_daddr_t minlen,
265 : uint64_t *blocks_trimmed)
266 : {
267 308764 : struct xfs_mount *mp = pag->pag_mount;
268 308764 : struct xfs_buf *agbp;
269 308764 : struct xfs_agf *agf;
270 308764 : int error;
271 :
272 : /*
273 : * Force out the log. This means any transactions that might have freed
274 : * space before we take the AGF buffer lock are now on disk, and the
275 : * volatile disk cache is flushed.
276 : */
277 308764 : xfs_log_force(mp, XFS_LOG_SYNC);
278 :
279 308764 : error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
280 308764 : if (error)
281 : return error;
282 308764 : agf = agbp->b_addr;
283 :
284 308764 : if (start > XFS_AGB_TO_DADDR(mp, pag->pag_agno, 0) ||
285 132845 : end < XFS_AGB_TO_DADDR(mp, pag->pag_agno,
286 132845 : be32_to_cpu(agf->agf_length)) - 1) {
287 : /* Only trimming part of this AG */
288 293142 : error = xfs_trim_ag_bybno(pag, &agbp, start, end, minlen,
289 : blocks_trimmed);
290 : } else {
291 : /* Trim this entire AG */
292 15622 : error = xfs_trim_ag_bylen(pag, agbp, start, end, minlen,
293 : blocks_trimmed);
294 : }
295 :
296 308764 : xfs_buf_relse(agbp);
297 308764 : return error;
298 : }
299 :
300 : static int
301 204917 : xfs_trim_ddev_extents(
302 : struct xfs_mount *mp,
303 : xfs_daddr_t start,
304 : xfs_daddr_t end,
305 : xfs_daddr_t minlen,
306 : uint64_t *blocks_trimmed)
307 : {
308 204917 : struct xfs_perag *pag;
309 204917 : xfs_agnumber_t agno;
310 204917 : int error, last_error = 0;
311 :
312 204917 : if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
313 46167 : end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
314 :
315 204917 : agno = xfs_daddr_to_agno(mp, start);
316 513661 : for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
317 308764 : error = xfs_trim_ag_extents(pag, start, end, minlen,
318 : blocks_trimmed);
319 308764 : if (error) {
320 20 : last_error = error;
321 20 : if (error == -ERESTARTSYS) {
322 20 : xfs_perag_rele(pag);
323 20 : break;
324 : }
325 : }
326 : }
327 :
328 204917 : return last_error;
329 : }
330 :
331 : #ifdef CONFIG_XFS_RT
332 : struct xfs_trim_rtdev {
333 : uint64_t *blocks_trimmed;
334 : xfs_rtblock_t minlen_fsb;
335 : unsigned long last_relax;
336 : };
337 :
338 : static int
339 36486346 : xfs_trim_rtdev_extent(
340 : struct xfs_mount *mp,
341 : struct xfs_trans *tp,
342 : const struct xfs_rtalloc_rec *rec,
343 : void *priv)
344 : {
345 36486346 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_rtdev_targp);
346 36486346 : struct xfs_trim_rtdev *tr = priv;
347 36486346 : xfs_rtblock_t rbno, rlen;
348 36486346 : xfs_daddr_t dbno, dlen;
349 36486346 : int error;
350 :
351 36486346 : if (fatal_signal_pending(current))
352 : return -ERESTARTSYS;
353 :
354 36486339 : rbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
355 36486339 : rlen = xfs_rtx_to_rtb(mp, rec->ar_extcount);
356 :
357 : /* Ignore too small. */
358 36486339 : if (rlen < tr->minlen_fsb) {
359 27849136 : trace_xfs_discard_rttoosmall(mp, rbno, rlen);
360 27849136 : goto out;
361 : }
362 :
363 8637203 : trace_xfs_discard_rtextent(mp, rbno, rlen);
364 :
365 8637203 : dbno = XFS_FSB_TO_BB(mp, rbno);
366 8637203 : dlen = XFS_FSB_TO_BB(mp, rlen);
367 :
368 8637203 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
369 8637203 : if (error)
370 : return error;
371 :
372 8637203 : *tr->blocks_trimmed += rlen;
373 :
374 36486339 : out:
375 36486339 : if (time_after(jiffies, tr->last_relax + XFS_TRIM_RELAX_INTERVAL)) {
376 : /*
377 : * Cycle the rtbitmap lock since we know how to pick up
378 : * where we left off.
379 : */
380 17050 : trace_xfs_discard_rtrelax(mp, rbno, rlen);
381 17050 : xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
382 17050 : xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
383 17050 : tr->last_relax = jiffies;
384 : }
385 :
386 : return 0;
387 : }
388 :
389 : static int
390 85659 : xfs_trim_rtdev_extents(
391 : struct xfs_mount *mp,
392 : xfs_daddr_t start,
393 : xfs_daddr_t end,
394 : xfs_daddr_t minlen,
395 : uint64_t *blocks_trimmed)
396 : {
397 85659 : struct xfs_rtalloc_rec low = { }, high = { };
398 256977 : struct xfs_trim_rtdev tr = {
399 : .blocks_trimmed = blocks_trimmed,
400 85659 : .minlen_fsb = XFS_BB_TO_FSB(mp, minlen),
401 : .last_relax = jiffies,
402 : };
403 85659 : xfs_daddr_t rtdev_daddr;
404 85659 : xfs_extlen_t mod;
405 85659 : int error;
406 :
407 : /* Shift the start and end downwards to match the rt device. */
408 85659 : rtdev_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
409 85659 : if (start > rtdev_daddr)
410 21300 : start -= rtdev_daddr;
411 : else
412 : start = 0;
413 :
414 85659 : if (end <= rtdev_daddr)
415 : return 0;
416 35904 : end -= rtdev_daddr;
417 :
418 35904 : if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1)
419 938 : end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1;
420 :
421 : /* Convert the rt blocks to rt extents */
422 35904 : low.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSB(mp, start), &mod);
423 35904 : if (mod)
424 2549 : low.ar_startext++;
425 35904 : high.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end), &mod);
426 :
427 : /*
428 : * Walk the free ranges between low and high. The query_range function
429 : * trims the extents returned.
430 : */
431 35904 : xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
432 35904 : error = xfs_rtalloc_query_range(mp, NULL, &low, &high,
433 : xfs_trim_rtdev_extent, &tr);
434 35904 : xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
435 35904 : return error;
436 : }
437 : #else
438 : # define xfs_trim_rtdev_extents(m,s,e,n,b) (-EOPNOTSUPP)
439 : #endif /* CONFIG_XFS_RT */
440 :
441 : /*
442 : * trim a range of the filesystem.
443 : *
444 : * Note: the parameters passed from userspace are byte ranges into the
445 : * filesystem which does not match to the format we use for filesystem block
446 : * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
447 : * is a linear address range. Hence we need to use DADDR based conversions and
448 : * comparisons for determining the correct offset and regions to trim.
449 : *
450 : * The realtime device is mapped into the FITRIM "address space" immediately
451 : * after the data device.
452 : */
453 : int
454 205005 : xfs_ioc_trim(
455 : struct xfs_mount *mp,
456 : struct fstrim_range __user *urange)
457 : {
458 205005 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
459 205005 : struct block_device *rt_bdev = NULL;
460 205005 : unsigned int granularity = bdev_discard_granularity(bdev);
461 205005 : struct fstrim_range range;
462 205005 : xfs_rfsblock_t max_blocks;
463 205005 : xfs_daddr_t start, end, minlen;
464 205005 : uint64_t blocks_trimmed = 0;
465 205005 : int error, last_error = 0;
466 :
467 205005 : if (!capable(CAP_SYS_ADMIN))
468 : return -EPERM;
469 205005 : if (!bdev_max_discard_sectors(bdev))
470 : return -EOPNOTSUPP;
471 :
472 205005 : if (mp->m_rtdev_targp) {
473 85708 : rt_bdev = xfs_buftarg_bdev(mp->m_rtdev_targp);
474 85708 : if (!bdev_max_discard_sectors(rt_bdev))
475 : return -EOPNOTSUPP;
476 85708 : granularity = max(granularity,
477 : bdev_discard_granularity(rt_bdev));
478 : }
479 :
480 : /*
481 : * We haven't recovered the log, so we cannot use our bnobt-guided
482 : * storage zapping commands.
483 : */
484 205005 : if (xfs_has_norecovery(mp))
485 : return -EROFS;
486 :
487 204994 : if (copy_from_user(&range, urange, sizeof(range)))
488 : return -EFAULT;
489 :
490 204994 : range.minlen = max_t(u64, granularity, range.minlen);
491 204994 : minlen = BTOBB(range.minlen);
492 : /*
493 : * Truncating down the len isn't actually quite correct, but using
494 : * BBTOB would mean we trivially get overflows for values
495 : * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
496 : * used by the fstrim application. In the end it really doesn't
497 : * matter as trimming blocks is an advisory interface.
498 : */
499 204994 : max_blocks = mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks;
500 204994 : if (range.start >= XFS_FSB_TO_B(mp, max_blocks) ||
501 204939 : range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
502 204939 : range.len < mp->m_sb.sb_blocksize)
503 77 : return -EINVAL;
504 :
505 204917 : start = BTOBB(range.start);
506 204917 : end = start + BTOBBT(range.len) - 1;
507 :
508 204917 : error = xfs_trim_ddev_extents(mp, start, end, minlen, &blocks_trimmed);
509 204917 : if (error == -ERESTARTSYS)
510 : return error;
511 204897 : if (error)
512 0 : last_error = error;
513 :
514 204897 : if (rt_bdev) {
515 85659 : error = xfs_trim_rtdev_extents(mp, start, end, minlen,
516 : &blocks_trimmed);
517 85659 : if (error == -ERESTARTSYS)
518 : return error;
519 85652 : if (error)
520 0 : last_error = error;
521 : }
522 :
523 204890 : if (last_error)
524 : return last_error;
525 :
526 204890 : range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
527 204890 : if (copy_to_user(urange, &range, sizeof(range)))
528 0 : return -EFAULT;
529 : return 0;
530 : }
|