Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2014 Christoph Hellwig.
4 : */
5 : #include "xfs.h"
6 : #include "xfs_shared.h"
7 : #include "xfs_format.h"
8 : #include "xfs_log_format.h"
9 : #include "xfs_trans_resv.h"
10 : #include "xfs_mount.h"
11 : #include "xfs_inode.h"
12 : #include "xfs_trans.h"
13 : #include "xfs_bmap.h"
14 : #include "xfs_iomap.h"
15 : #include "xfs_pnfs.h"
16 :
17 : /*
18 : * Ensure that we do not have any outstanding pNFS layouts that can be used by
19 : * clients to directly read from or write to this inode. This must be called
20 : * before every operation that can remove blocks from the extent map.
21 : * Additionally we call it during the write operation, where aren't concerned
22 : * about exposing unallocated blocks but just want to provide basic
23 : * synchronization between a local writer and pNFS clients. mmap writes would
24 : * also benefit from this sort of synchronization, but due to the tricky locking
25 : * rules in the page fault path we don't bother.
26 : */
27 : int
28 95815438 : xfs_break_leased_layouts(
29 : struct inode *inode,
30 : uint *iolock,
31 : bool *did_unlock)
32 : {
33 95815438 : struct xfs_inode *ip = XFS_I(inode);
34 95815438 : int error;
35 :
36 95815438 : while ((error = break_layout(inode, false)) == -EWOULDBLOCK) {
37 0 : xfs_iunlock(ip, *iolock);
38 0 : *did_unlock = true;
39 0 : error = break_layout(inode, true);
40 0 : *iolock &= ~XFS_IOLOCK_SHARED;
41 0 : *iolock |= XFS_IOLOCK_EXCL;
42 0 : xfs_ilock(ip, *iolock);
43 : }
44 :
45 95845096 : return error;
46 : }
47 :
48 : /*
49 : * Get a unique ID including its location so that the client can identify
50 : * the exported device.
51 : */
52 : int
53 0 : xfs_fs_get_uuid(
54 : struct super_block *sb,
55 : u8 *buf,
56 : u32 *len,
57 : u64 *offset)
58 : {
59 0 : struct xfs_mount *mp = XFS_M(sb);
60 :
61 0 : xfs_notice_once(mp,
62 : "Using experimental pNFS feature, use at your own risk!");
63 :
64 0 : if (*len < sizeof(uuid_t))
65 : return -EINVAL;
66 :
67 0 : memcpy(buf, &mp->m_sb.sb_uuid, sizeof(uuid_t));
68 0 : *len = sizeof(uuid_t);
69 0 : *offset = offsetof(struct xfs_dsb, sb_uuid);
70 0 : return 0;
71 : }
72 :
73 : /*
74 : * We cannot use file based VFS helpers such as file_modified() to update
75 : * inode state as we modify the data/metadata in the inode here. Hence we have
76 : * to open code the timestamp updates and SUID/SGID stripping. We also need
77 : * to set the inode prealloc flag to ensure that the extents we allocate are not
78 : * removed if the inode is reclaimed from memory before xfs_fs_block_commit()
79 : * is from the client to indicate that data has been written and the file size
80 : * can be extended.
81 : */
82 : static int
83 0 : xfs_fs_map_update_inode(
84 : struct xfs_inode *ip)
85 : {
86 0 : struct xfs_trans *tp;
87 0 : int error;
88 :
89 0 : error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid,
90 : 0, 0, 0, &tp);
91 0 : if (error)
92 : return error;
93 :
94 0 : xfs_ilock(ip, XFS_ILOCK_EXCL);
95 0 : xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
96 :
97 0 : VFS_I(ip)->i_mode &= ~S_ISUID;
98 0 : if (VFS_I(ip)->i_mode & S_IXGRP)
99 0 : VFS_I(ip)->i_mode &= ~S_ISGID;
100 0 : xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
101 0 : ip->i_diflags |= XFS_DIFLAG_PREALLOC;
102 :
103 0 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
104 0 : return xfs_trans_commit(tp);
105 : }
106 :
107 : /*
108 : * Get a layout for the pNFS client.
109 : */
110 : int
111 0 : xfs_fs_map_blocks(
112 : struct inode *inode,
113 : loff_t offset,
114 : u64 length,
115 : struct iomap *iomap,
116 : bool write,
117 : u32 *device_generation)
118 : {
119 0 : struct xfs_inode *ip = XFS_I(inode);
120 0 : struct xfs_mount *mp = ip->i_mount;
121 0 : struct xfs_bmbt_irec imap;
122 0 : xfs_fileoff_t offset_fsb, end_fsb;
123 0 : loff_t limit;
124 0 : int bmapi_flags = XFS_BMAPI_ENTIRE;
125 0 : int nimaps = 1;
126 0 : uint lock_flags;
127 0 : int error = 0;
128 0 : u64 seq;
129 :
130 0 : if (xfs_is_shutdown(mp))
131 : return -EIO;
132 :
133 : /*
134 : * We can't export inodes residing on the realtime device. The realtime
135 : * device doesn't have a UUID to identify it, so the client has no way
136 : * to find it.
137 : */
138 0 : if (XFS_IS_REALTIME_INODE(ip))
139 : return -ENXIO;
140 :
141 : /*
142 : * The pNFS block layout spec actually supports reflink like
143 : * functionality, but the Linux pNFS server doesn't implement it yet.
144 : */
145 0 : if (xfs_is_reflink_inode(ip))
146 : return -ENXIO;
147 :
148 : /*
149 : * Lock out any other I/O before we flush and invalidate the pagecache,
150 : * and then hand out a layout to the remote system. This is very
151 : * similar to direct I/O, except that the synchronization is much more
152 : * complicated. See the comment near xfs_break_leased_layouts
153 : * for a detailed explanation.
154 : */
155 0 : xfs_ilock(ip, XFS_IOLOCK_EXCL);
156 :
157 0 : error = -EINVAL;
158 0 : limit = mp->m_super->s_maxbytes;
159 0 : if (!write)
160 0 : limit = max(limit, round_up(i_size_read(inode),
161 : inode->i_sb->s_blocksize));
162 0 : if (offset > limit)
163 0 : goto out_unlock;
164 0 : if (offset > limit - length)
165 0 : length = limit - offset;
166 :
167 0 : error = filemap_write_and_wait(inode->i_mapping);
168 0 : if (error)
169 0 : goto out_unlock;
170 0 : error = invalidate_inode_pages2(inode->i_mapping);
171 0 : if (WARN_ON_ONCE(error))
172 0 : goto out_unlock;
173 :
174 0 : end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length);
175 0 : offset_fsb = XFS_B_TO_FSBT(mp, offset);
176 :
177 0 : lock_flags = xfs_ilock_data_map_shared(ip);
178 0 : error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
179 : &imap, &nimaps, bmapi_flags);
180 0 : seq = xfs_iomap_inode_sequence(ip, 0);
181 :
182 0 : ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK);
183 :
184 0 : if (!error && write &&
185 0 : (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) {
186 0 : if (offset + length > XFS_ISIZE(ip))
187 0 : end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb);
188 0 : else if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
189 0 : end_fsb = min(end_fsb, imap.br_startoff +
190 : imap.br_blockcount);
191 0 : xfs_iunlock(ip, lock_flags);
192 :
193 0 : error = xfs_iomap_write_direct(ip, offset_fsb,
194 : end_fsb - offset_fsb, 0, &imap, &seq);
195 0 : if (error)
196 0 : goto out_unlock;
197 :
198 : /*
199 : * Ensure the next transaction is committed synchronously so
200 : * that the blocks allocated and handed out to the client are
201 : * guaranteed to be present even after a server crash.
202 : */
203 0 : error = xfs_fs_map_update_inode(ip);
204 0 : if (!error)
205 0 : error = xfs_log_force_inode(ip);
206 0 : if (error)
207 0 : goto out_unlock;
208 :
209 : } else {
210 0 : xfs_iunlock(ip, lock_flags);
211 : }
212 0 : xfs_iunlock(ip, XFS_IOLOCK_EXCL);
213 :
214 0 : error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq);
215 0 : *device_generation = mp->m_generation;
216 0 : return error;
217 0 : out_unlock:
218 0 : xfs_iunlock(ip, XFS_IOLOCK_EXCL);
219 0 : return error;
220 : }
221 :
222 : /*
223 : * Ensure the size update falls into a valid allocated block.
224 : */
225 : static int
226 0 : xfs_pnfs_validate_isize(
227 : struct xfs_inode *ip,
228 : xfs_off_t isize)
229 : {
230 0 : struct xfs_bmbt_irec imap;
231 0 : int nimaps = 1;
232 0 : int error = 0;
233 :
234 0 : xfs_ilock(ip, XFS_ILOCK_SHARED);
235 0 : error = xfs_bmapi_read(ip, XFS_B_TO_FSBT(ip->i_mount, isize - 1), 1,
236 : &imap, &nimaps, 0);
237 0 : xfs_iunlock(ip, XFS_ILOCK_SHARED);
238 0 : if (error)
239 : return error;
240 :
241 0 : if (imap.br_startblock == HOLESTARTBLOCK ||
242 0 : imap.br_startblock == DELAYSTARTBLOCK ||
243 0 : imap.br_state == XFS_EXT_UNWRITTEN)
244 0 : return -EIO;
245 : return 0;
246 : }
247 :
248 : /*
249 : * Make sure the blocks described by maps are stable on disk. This includes
250 : * converting any unwritten extents, flushing the disk cache and updating the
251 : * time stamps.
252 : *
253 : * Note that we rely on the caller to always send us a timestamp update so that
254 : * we always commit a transaction here. If that stops being true we will have
255 : * to manually flush the cache here similar to what the fsync code path does
256 : * for datasyncs on files that have no dirty metadata.
257 : */
258 : int
259 0 : xfs_fs_commit_blocks(
260 : struct inode *inode,
261 : struct iomap *maps,
262 : int nr_maps,
263 : struct iattr *iattr)
264 : {
265 0 : struct xfs_inode *ip = XFS_I(inode);
266 0 : struct xfs_mount *mp = ip->i_mount;
267 0 : struct xfs_trans *tp;
268 0 : bool update_isize = false;
269 0 : int error, i;
270 0 : loff_t size;
271 :
272 0 : ASSERT(iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME));
273 :
274 0 : xfs_ilock(ip, XFS_IOLOCK_EXCL);
275 :
276 0 : size = i_size_read(inode);
277 0 : if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size > size) {
278 0 : update_isize = true;
279 0 : size = iattr->ia_size;
280 : }
281 :
282 0 : for (i = 0; i < nr_maps; i++) {
283 0 : u64 start, length, end;
284 :
285 0 : start = maps[i].offset;
286 0 : if (start > size)
287 0 : continue;
288 :
289 0 : end = start + maps[i].length;
290 0 : if (end > size)
291 : end = size;
292 :
293 0 : length = end - start;
294 0 : if (!length)
295 0 : continue;
296 :
297 : /*
298 : * Make sure reads through the pagecache see the new data.
299 : */
300 0 : error = invalidate_inode_pages2_range(inode->i_mapping,
301 0 : start >> PAGE_SHIFT,
302 0 : (end - 1) >> PAGE_SHIFT);
303 0 : WARN_ON_ONCE(error);
304 :
305 0 : error = xfs_iomap_write_unwritten(ip, start, length, false);
306 0 : if (error)
307 0 : goto out_drop_iolock;
308 : }
309 :
310 0 : if (update_isize) {
311 0 : error = xfs_pnfs_validate_isize(ip, size);
312 0 : if (error)
313 0 : goto out_drop_iolock;
314 : }
315 :
316 0 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
317 0 : if (error)
318 0 : goto out_drop_iolock;
319 :
320 0 : xfs_ilock(ip, XFS_ILOCK_EXCL);
321 0 : xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
322 0 : xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
323 :
324 0 : ASSERT(!(iattr->ia_valid & (ATTR_UID | ATTR_GID)));
325 0 : setattr_copy(&nop_mnt_idmap, inode, iattr);
326 0 : if (update_isize) {
327 0 : i_size_write(inode, iattr->ia_size);
328 0 : ip->i_disk_size = iattr->ia_size;
329 : }
330 :
331 0 : xfs_trans_set_sync(tp);
332 0 : error = xfs_trans_commit(tp);
333 :
334 0 : out_drop_iolock:
335 0 : xfs_iunlock(ip, XFS_IOLOCK_EXCL);
336 0 : return error;
337 : }
|