Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2010 Red Hat, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_shared.h"
8 : #include "xfs_format.h"
9 : #include "xfs_log_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_alloc_btree.h"
14 : #include "xfs_alloc.h"
15 : #include "xfs_discard.h"
16 : #include "xfs_error.h"
17 : #include "xfs_extent_busy.h"
18 : #include "xfs_trace.h"
19 : #include "xfs_log.h"
20 : #include "xfs_ag.h"
21 : #include "xfs_health.h"
22 :
23 : /*
24 : * For trim functions that support it, cycle the metadata locks periodically
25 : * to prevent other parts of the filesystem from starving.
26 : */
27 : #define XFS_TRIM_RELAX_INTERVAL (HZ)
28 :
29 : /* Trim the free space in this AG by block number. */
30 : static inline int
31 214857 : xfs_trim_ag_bybno(
32 : struct xfs_perag *pag,
33 : struct xfs_buf **agbpp,
34 : xfs_daddr_t start,
35 : xfs_daddr_t end,
36 : xfs_daddr_t minlen,
37 : uint64_t *blocks_trimmed)
38 : {
39 214857 : struct xfs_mount *mp = pag->pag_mount;
40 214857 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
41 214857 : struct xfs_btree_cur *cur;
42 214857 : struct xfs_agf *agf = (*agbpp)->b_addr;
43 214857 : xfs_daddr_t end_daddr;
44 214857 : xfs_agnumber_t agno = pag->pag_agno;
45 214857 : xfs_agblock_t start_agbno;
46 214857 : xfs_agblock_t end_agbno;
47 214857 : xfs_extlen_t minlen_fsb = XFS_BB_TO_FSB(mp, minlen);
48 214857 : unsigned long last_relax = jiffies;
49 214857 : int i;
50 214857 : int error;
51 :
52 214857 : start = max(start, XFS_AGB_TO_DADDR(mp, agno, 0));
53 214857 : start_agbno = xfs_daddr_to_agbno(mp, start);
54 :
55 214857 : end_daddr = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(agf->agf_length));
56 214857 : end = min(end, end_daddr - 1);
57 214857 : end_agbno = xfs_daddr_to_agbno(mp, end);
58 :
59 214857 : cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag, XFS_BTNUM_BNO);
60 :
61 214857 : error = xfs_alloc_lookup_le(cur, start_agbno, 0, &i);
62 214857 : if (error)
63 0 : goto out_del_cursor;
64 :
65 : /*
66 : * If we didn't find anything at or below start_agbno, increment the
67 : * cursor to see if there's another record above it.
68 : */
69 214857 : if (!i) {
70 203481 : error = xfs_btree_increment(cur, 0, &i);
71 203481 : if (error)
72 0 : goto out_del_cursor;
73 : }
74 :
75 : /* Loop the entire range that was asked for. */
76 29598927 : while (i) {
77 29486375 : xfs_agblock_t fbno;
78 29486375 : xfs_extlen_t flen;
79 29486375 : xfs_daddr_t dbno;
80 29486375 : xfs_extlen_t dlen;
81 :
82 29486375 : error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
83 29486375 : if (error)
84 25 : goto out_del_cursor;
85 29486375 : if (XFS_IS_CORRUPT(mp, i != 1)) {
86 0 : xfs_btree_mark_sick(cur);
87 0 : error = -EFSCORRUPTED;
88 0 : goto out_del_cursor;
89 : }
90 :
91 : /* Skip extents entirely outside of the range. */
92 29486375 : if (fbno >= end_agbno)
93 : break;
94 29384095 : if (fbno + flen < start_agbno)
95 471 : goto next_extent;
96 :
97 : /* Trim the extent returned to the range we want. */
98 29383624 : if (fbno < start_agbno) {
99 10903 : flen -= start_agbno - fbno;
100 10903 : fbno = start_agbno;
101 : }
102 29383624 : if (fbno + flen > end_agbno + 1)
103 9790 : flen = end_agbno - fbno + 1;
104 :
105 : /* Ignore too small. */
106 29383624 : if (flen < minlen_fsb) {
107 28296043 : trace_xfs_discard_toosmall(mp, agno, fbno, flen);
108 28296043 : goto next_extent;
109 : }
110 :
111 : /*
112 : * If any blocks in the range are still busy, skip the
113 : * discard and try again the next time.
114 : */
115 1087581 : if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
116 247 : trace_xfs_discard_busy(mp, agno, fbno, flen);
117 247 : goto next_extent;
118 : }
119 :
120 1087334 : trace_xfs_discard_extent(mp, agno, fbno, flen);
121 :
122 1087334 : dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
123 1087334 : dlen = XFS_FSB_TO_BB(mp, flen);
124 1087334 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
125 1087334 : if (error)
126 0 : goto out_del_cursor;
127 1087334 : *blocks_trimmed += flen;
128 :
129 1087334 : if (time_after(jiffies, last_relax + XFS_TRIM_RELAX_INTERVAL)) {
130 : /*
131 : * Cycle the AGF lock since we know how to pick up
132 : * where we left off.
133 : */
134 1565 : trace_xfs_discard_relax(mp, agno, fbno, flen);
135 1565 : xfs_btree_del_cursor(cur, error);
136 1565 : xfs_buf_relse(*agbpp);
137 :
138 1565 : error = xfs_alloc_read_agf(pag, NULL, 0, agbpp);
139 1565 : if (error)
140 0 : return error;
141 :
142 1565 : cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag,
143 : XFS_BTNUM_BNO);
144 1565 : error = xfs_alloc_lookup_ge(cur, fbno + flen, 0, &i);
145 1565 : last_relax = jiffies;
146 : } else {
147 1085769 : next_extent:
148 29382530 : error = xfs_btree_increment(cur, 0, &i);
149 : }
150 29384095 : if (error)
151 0 : goto out_del_cursor;
152 :
153 29384095 : if (fatal_signal_pending(current)) {
154 25 : error = -ERESTARTSYS;
155 25 : goto out_del_cursor;
156 : }
157 : }
158 :
159 112552 : out_del_cursor:
160 214857 : xfs_btree_del_cursor(cur, error);
161 214857 : return error;
162 : }
163 :
164 : /* Trim the free space in this AG by length. */
165 : static inline int
166 16960 : xfs_trim_ag_bylen(
167 : struct xfs_perag *pag,
168 : struct xfs_buf *agbp,
169 : xfs_daddr_t start,
170 : xfs_daddr_t end,
171 : xfs_daddr_t minlen,
172 : uint64_t *blocks_trimmed)
173 : {
174 16960 : struct xfs_mount *mp = pag->pag_mount;
175 16960 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
176 16960 : struct xfs_btree_cur *cur;
177 16960 : struct xfs_agf *agf = agbp->b_addr;
178 16960 : int error;
179 16960 : int i;
180 :
181 16960 : cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
182 :
183 : /*
184 : * Look up the longest btree in the AGF and start with it.
185 : */
186 16960 : error = xfs_alloc_lookup_ge(cur, 0, be32_to_cpu(agf->agf_longest), &i);
187 16959 : if (error)
188 0 : goto out_del_cursor;
189 :
190 : /*
191 : * Loop until we are done with all extents that are large
192 : * enough to be worth discarding.
193 : */
194 111990 : while (i) {
195 106551 : xfs_agblock_t fbno;
196 106551 : xfs_extlen_t flen;
197 106551 : xfs_daddr_t dbno;
198 106551 : xfs_extlen_t dlen;
199 :
200 106551 : error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
201 106552 : if (error)
202 : break;
203 106552 : if (XFS_IS_CORRUPT(mp, i != 1)) {
204 0 : xfs_btree_mark_sick(cur);
205 0 : error = -EFSCORRUPTED;
206 0 : break;
207 : }
208 106552 : ASSERT(flen <= be32_to_cpu(agf->agf_longest));
209 :
210 : /*
211 : * use daddr format for all range/len calculations as that is
212 : * the format the range/len variables are supplied in by
213 : * userspace.
214 : */
215 106552 : dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
216 106552 : dlen = XFS_FSB_TO_BB(mp, flen);
217 :
218 : /*
219 : * Too small? Give up.
220 : */
221 106552 : if (dlen < minlen) {
222 11516 : trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno,
223 : flen);
224 11516 : break;
225 : }
226 :
227 : /*
228 : * If any blocks in the range are still busy, skip the
229 : * discard and try again the next time.
230 : */
231 95036 : if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
232 304 : trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
233 304 : goto next_extent;
234 : }
235 :
236 94732 : trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen);
237 94730 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
238 94733 : if (error)
239 : break;
240 94733 : *blocks_trimmed += flen;
241 :
242 95037 : next_extent:
243 95037 : error = xfs_btree_decrement(cur, 0, &i);
244 95037 : if (error)
245 : break;
246 :
247 95037 : if (fatal_signal_pending(current)) {
248 : error = -ERESTARTSYS;
249 : break;
250 : }
251 : }
252 :
253 5439 : out_del_cursor:
254 16961 : xfs_btree_del_cursor(cur, error);
255 16961 : return error;
256 : }
257 :
258 : STATIC int
259 231818 : xfs_trim_ag_extents(
260 : struct xfs_perag *pag,
261 : xfs_daddr_t start,
262 : xfs_daddr_t end,
263 : xfs_daddr_t minlen,
264 : uint64_t *blocks_trimmed)
265 : {
266 231818 : struct xfs_mount *mp = pag->pag_mount;
267 231818 : struct xfs_buf *agbp;
268 231818 : struct xfs_agf *agf;
269 231818 : int error;
270 :
271 : /*
272 : * Force out the log. This means any transactions that might have freed
273 : * space before we take the AGF buffer lock are now on disk, and the
274 : * volatile disk cache is flushed.
275 : */
276 231818 : xfs_log_force(mp, XFS_LOG_SYNC);
277 :
278 231818 : error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
279 231817 : if (error)
280 : return error;
281 231817 : agf = agbp->b_addr;
282 :
283 231817 : if (start > XFS_AGB_TO_DADDR(mp, pag->pag_agno, 0) ||
284 107381 : end < XFS_AGB_TO_DADDR(mp, pag->pag_agno,
285 107381 : be32_to_cpu(agf->agf_length)) - 1) {
286 : /* Only trimming part of this AG */
287 214857 : error = xfs_trim_ag_bybno(pag, &agbp, start, end, minlen,
288 : blocks_trimmed);
289 : } else {
290 : /* Trim this entire AG */
291 16960 : error = xfs_trim_ag_bylen(pag, agbp, start, end, minlen,
292 : blocks_trimmed);
293 : }
294 :
295 231818 : xfs_buf_relse(agbp);
296 231818 : return error;
297 : }
298 :
299 : static int
300 125310 : xfs_trim_ddev_extents(
301 : struct xfs_mount *mp,
302 : xfs_daddr_t start,
303 : xfs_daddr_t end,
304 : xfs_daddr_t minlen,
305 : uint64_t *blocks_trimmed)
306 : {
307 125310 : struct xfs_perag *pag;
308 125310 : xfs_agnumber_t agno;
309 125310 : int error, last_error = 0;
310 :
311 125310 : if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
312 16334 : end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
313 :
314 125310 : agno = xfs_daddr_to_agno(mp, start);
315 357097 : for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
316 231818 : error = xfs_trim_ag_extents(pag, start, end, minlen,
317 : blocks_trimmed);
318 231818 : if (error) {
319 31 : last_error = error;
320 31 : if (error == -ERESTARTSYS) {
321 31 : xfs_perag_rele(pag);
322 31 : break;
323 : }
324 : }
325 : }
326 :
327 125310 : return last_error;
328 : }
329 :
330 : /*
331 : * trim a range of the filesystem.
332 : *
333 : * Note: the parameters passed from userspace are byte ranges into the
334 : * filesystem which does not match to the format we use for filesystem block
335 : * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
336 : * is a linear address range. Hence we need to use DADDR based conversions and
337 : * comparisons for determining the correct offset and regions to trim.
338 : */
339 : int
340 125407 : xfs_ioc_trim(
341 : struct xfs_mount *mp,
342 : struct fstrim_range __user *urange)
343 : {
344 125407 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
345 125407 : unsigned int granularity = bdev_discard_granularity(bdev);
346 125407 : struct fstrim_range range;
347 125407 : xfs_daddr_t start, end, minlen;
348 125407 : uint64_t blocks_trimmed = 0;
349 125407 : int error, last_error = 0;
350 :
351 125407 : if (!capable(CAP_SYS_ADMIN))
352 : return -EPERM;
353 125408 : if (!bdev_max_discard_sectors(bdev))
354 : return -EOPNOTSUPP;
355 :
356 : /*
357 : * We haven't recovered the log, so we cannot use our bnobt-guided
358 : * storage zapping commands.
359 : */
360 125408 : if (xfs_has_norecovery(mp))
361 : return -EROFS;
362 :
363 125397 : if (copy_from_user(&range, urange, sizeof(range)))
364 : return -EFAULT;
365 :
366 125397 : range.minlen = max_t(u64, granularity, range.minlen);
367 125397 : minlen = BTOBB(range.minlen);
368 : /*
369 : * Truncating down the len isn't actually quite correct, but using
370 : * BBTOB would mean we trivially get overflows for values
371 : * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
372 : * used by the fstrim application. In the end it really doesn't
373 : * matter as trimming blocks is an advisory interface.
374 : */
375 125397 : if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) ||
376 125332 : range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
377 125332 : range.len < mp->m_sb.sb_blocksize)
378 87 : return -EINVAL;
379 :
380 125310 : start = BTOBB(range.start);
381 125310 : end = start + BTOBBT(range.len) - 1;
382 :
383 125310 : error = xfs_trim_ddev_extents(mp, start, end, minlen, &blocks_trimmed);
384 125310 : if (error == -ERESTARTSYS)
385 : return error;
386 125279 : if (error)
387 : last_error = error;
388 :
389 125279 : if (last_error)
390 : return last_error;
391 :
392 125279 : range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
393 125279 : if (copy_to_user(urange, &range, sizeof(range)))
394 0 : return -EFAULT;
395 : return 0;
396 : }
|