Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/fs.h>
3 : #include <linux/random.h>
4 : #include <linux/buffer_head.h>
5 : #include <linux/utsname.h>
6 : #include <linux/kthread.h>
7 :
8 : #include "ext4.h"
9 :
10 : /* Checksumming functions */
11 16 : static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
12 : {
13 16 : struct ext4_sb_info *sbi = EXT4_SB(sb);
14 16 : int offset = offsetof(struct mmp_struct, mmp_checksum);
15 16 : __u32 csum;
16 :
17 16 : csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
18 :
19 16 : return cpu_to_le32(csum);
20 : }
21 :
22 7 : static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
23 : {
24 7 : if (!ext4_has_metadata_csum(sb))
25 : return 1;
26 :
27 7 : return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
28 : }
29 :
30 9 : static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31 : {
32 9 : if (!ext4_has_metadata_csum(sb))
33 : return;
34 :
35 9 : mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
36 : }
37 :
38 : /*
39 : * Write the MMP block using REQ_SYNC to try to get the block on-disk
40 : * faster.
41 : */
42 9 : static int write_mmp_block_thawed(struct super_block *sb,
43 : struct buffer_head *bh)
44 : {
45 9 : struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
46 :
47 9 : ext4_mmp_csum_set(sb, mmp);
48 9 : lock_buffer(bh);
49 9 : bh->b_end_io = end_buffer_write_sync;
50 9 : get_bh(bh);
51 9 : submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh);
52 9 : wait_on_buffer(bh);
53 18 : if (unlikely(!buffer_uptodate(bh)))
54 0 : return -EIO;
55 : return 0;
56 : }
57 :
58 6 : static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
59 : {
60 6 : int err;
61 :
62 : /*
63 : * We protect against freezing so that we don't create dirty buffers
64 : * on frozen filesystem.
65 : */
66 6 : sb_start_write(sb);
67 6 : err = write_mmp_block_thawed(sb, bh);
68 6 : sb_end_write(sb);
69 6 : return err;
70 : }
71 :
72 : /*
73 : * Read the MMP block. It _must_ be read from disk and hence we clear the
74 : * uptodate flag on the buffer.
75 : */
76 7 : static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
77 : ext4_fsblk_t mmp_block)
78 : {
79 7 : struct mmp_struct *mmp;
80 7 : int ret;
81 :
82 7 : if (*bh)
83 3 : clear_buffer_uptodate(*bh);
84 :
85 : /* This would be sb_bread(sb, mmp_block), except we need to be sure
86 : * that the MD RAID device cache has been bypassed, and that the read
87 : * is not blocked in the elevator. */
88 7 : if (!*bh) {
89 4 : *bh = sb_getblk(sb, mmp_block);
90 4 : if (!*bh) {
91 0 : ret = -ENOMEM;
92 0 : goto warn_exit;
93 : }
94 : }
95 :
96 7 : lock_buffer(*bh);
97 7 : ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL);
98 7 : if (ret)
99 0 : goto warn_exit;
100 :
101 7 : mmp = (struct mmp_struct *)((*bh)->b_data);
102 7 : if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
103 0 : ret = -EFSCORRUPTED;
104 0 : goto warn_exit;
105 : }
106 7 : if (!ext4_mmp_csum_verify(sb, mmp)) {
107 1 : ret = -EFSBADCRC;
108 1 : goto warn_exit;
109 : }
110 : return 0;
111 1 : warn_exit:
112 1 : brelse(*bh);
113 1 : *bh = NULL;
114 1 : ext4_warning(sb, "Error %d while reading MMP block %llu",
115 : ret, mmp_block);
116 1 : return ret;
117 : }
118 :
119 : /*
120 : * Dump as much information as possible to help the admin.
121 : */
122 0 : void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
123 : const char *function, unsigned int line, const char *msg)
124 : {
125 0 : __ext4_warning(sb, function, line, "%s", msg);
126 0 : __ext4_warning(sb, function, line,
127 : "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
128 0 : (unsigned long long)le64_to_cpu(mmp->mmp_time),
129 0 : (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
130 0 : (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
131 0 : }
132 :
133 : /*
134 : * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
135 : */
136 3 : static int kmmpd(void *data)
137 : {
138 3 : struct super_block *sb = data;
139 3 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
140 3 : struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh;
141 3 : struct mmp_struct *mmp;
142 3 : ext4_fsblk_t mmp_block;
143 3 : u32 seq = 0;
144 3 : unsigned long failed_writes = 0;
145 3 : int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
146 3 : unsigned mmp_check_interval;
147 3 : unsigned long last_update_time;
148 3 : unsigned long diff;
149 3 : int retval = 0;
150 :
151 3 : mmp_block = le64_to_cpu(es->s_mmp_block);
152 3 : mmp = (struct mmp_struct *)(bh->b_data);
153 3 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
154 : /*
155 : * Start with the higher mmp_check_interval and reduce it if
156 : * the MMP block is being updated on time.
157 : */
158 3 : mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
159 : EXT4_MMP_MIN_CHECK_INTERVAL);
160 3 : mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
161 :
162 6 : memcpy(mmp->mmp_nodename, init_utsname()->nodename,
163 : sizeof(mmp->mmp_nodename));
164 :
165 6 : while (!kthread_should_stop() && !sb_rdonly(sb)) {
166 3 : if (!ext4_has_feature_mmp(sb)) {
167 0 : ext4_warning(sb, "kmmpd being stopped since MMP feature"
168 : " has been disabled.");
169 0 : goto wait_to_exit;
170 : }
171 3 : if (++seq > EXT4_MMP_SEQ_MAX)
172 0 : seq = 1;
173 :
174 3 : mmp->mmp_seq = cpu_to_le32(seq);
175 3 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
176 3 : last_update_time = jiffies;
177 :
178 3 : retval = write_mmp_block(sb, bh);
179 : /*
180 : * Don't spew too many error messages. Print one every
181 : * (s_mmp_update_interval * 60) seconds.
182 : */
183 3 : if (retval) {
184 0 : if ((failed_writes % 60) == 0) {
185 0 : ext4_error_err(sb, -retval,
186 : "Error writing to MMP block");
187 : }
188 0 : failed_writes++;
189 : }
190 :
191 3 : diff = jiffies - last_update_time;
192 3 : if (diff < mmp_update_interval * HZ)
193 3 : schedule_timeout_interruptible(mmp_update_interval *
194 3 : HZ - diff);
195 :
196 : /*
197 : * We need to make sure that more than mmp_check_interval
198 : * seconds have not passed since writing. If that has happened
199 : * we need to check if the MMP block is as we left it.
200 : */
201 3 : diff = jiffies - last_update_time;
202 3 : if (diff > mmp_check_interval * HZ) {
203 0 : struct buffer_head *bh_check = NULL;
204 0 : struct mmp_struct *mmp_check;
205 :
206 0 : retval = read_mmp_block(sb, &bh_check, mmp_block);
207 0 : if (retval) {
208 0 : ext4_error_err(sb, -retval,
209 : "error reading MMP data: %d",
210 : retval);
211 0 : goto wait_to_exit;
212 : }
213 :
214 0 : mmp_check = (struct mmp_struct *)(bh_check->b_data);
215 0 : if (mmp->mmp_seq != mmp_check->mmp_seq ||
216 0 : memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
217 : sizeof(mmp->mmp_nodename))) {
218 0 : dump_mmp_msg(sb, mmp_check,
219 : "Error while updating MMP info. "
220 : "The filesystem seems to have been"
221 : " multiply mounted.");
222 0 : ext4_error_err(sb, EBUSY, "abort");
223 0 : put_bh(bh_check);
224 0 : retval = -EBUSY;
225 0 : goto wait_to_exit;
226 : }
227 0 : put_bh(bh_check);
228 : }
229 :
230 : /*
231 : * Adjust the mmp_check_interval depending on how much time
232 : * it took for the MMP block to be written.
233 : */
234 3 : mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
235 : EXT4_MMP_MAX_CHECK_INTERVAL),
236 : EXT4_MMP_MIN_CHECK_INTERVAL);
237 3 : mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
238 : }
239 :
240 : /*
241 : * Unmount seems to be clean.
242 : */
243 3 : mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
244 3 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
245 :
246 3 : retval = write_mmp_block(sb, bh);
247 :
248 3 : wait_to_exit:
249 3 : while (!kthread_should_stop()) {
250 0 : set_current_state(TASK_INTERRUPTIBLE);
251 0 : if (!kthread_should_stop())
252 0 : schedule();
253 : }
254 3 : set_current_state(TASK_RUNNING);
255 3 : return retval;
256 : }
257 :
258 3173 : void ext4_stop_mmpd(struct ext4_sb_info *sbi)
259 : {
260 3173 : if (sbi->s_mmp_tsk) {
261 3 : kthread_stop(sbi->s_mmp_tsk);
262 3 : brelse(sbi->s_mmp_bh);
263 3 : sbi->s_mmp_tsk = NULL;
264 : }
265 3173 : }
266 :
267 : /*
268 : * Get a random new sequence number but make sure it is not greater than
269 : * EXT4_MMP_SEQ_MAX.
270 : */
271 : static unsigned int mmp_new_seq(void)
272 : {
273 3 : return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1);
274 : }
275 :
276 : /*
277 : * Protect the filesystem from being mounted more than once.
278 : */
279 4 : int ext4_multi_mount_protect(struct super_block *sb,
280 : ext4_fsblk_t mmp_block)
281 : {
282 4 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
283 4 : struct buffer_head *bh = NULL;
284 4 : struct mmp_struct *mmp = NULL;
285 4 : u32 seq;
286 4 : unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
287 4 : unsigned int wait_time = 0;
288 4 : int retval;
289 :
290 8 : if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
291 : mmp_block >= ext4_blocks_count(es)) {
292 0 : ext4_warning(sb, "Invalid MMP block in superblock");
293 0 : retval = -EINVAL;
294 0 : goto failed;
295 : }
296 :
297 4 : retval = read_mmp_block(sb, &bh, mmp_block);
298 4 : if (retval)
299 1 : goto failed;
300 :
301 3 : mmp = (struct mmp_struct *)(bh->b_data);
302 :
303 3 : if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
304 : mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
305 :
306 : /*
307 : * If check_interval in MMP block is larger, use that instead of
308 : * update_interval from the superblock.
309 : */
310 3 : if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
311 : mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
312 :
313 3 : seq = le32_to_cpu(mmp->mmp_seq);
314 3 : if (seq == EXT4_MMP_SEQ_CLEAN)
315 3 : goto skip;
316 :
317 0 : if (seq == EXT4_MMP_SEQ_FSCK) {
318 0 : dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
319 0 : retval = -EBUSY;
320 0 : goto failed;
321 : }
322 :
323 0 : wait_time = min(mmp_check_interval * 2 + 1,
324 : mmp_check_interval + 60);
325 :
326 : /* Print MMP interval if more than 20 secs. */
327 0 : if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
328 0 : ext4_warning(sb, "MMP interval %u higher than expected, please"
329 : " wait.\n", wait_time * 2);
330 :
331 0 : if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
332 0 : ext4_warning(sb, "MMP startup interrupted, failing mount\n");
333 0 : retval = -ETIMEDOUT;
334 0 : goto failed;
335 : }
336 :
337 0 : retval = read_mmp_block(sb, &bh, mmp_block);
338 0 : if (retval)
339 0 : goto failed;
340 0 : mmp = (struct mmp_struct *)(bh->b_data);
341 0 : if (seq != le32_to_cpu(mmp->mmp_seq)) {
342 0 : dump_mmp_msg(sb, mmp,
343 : "Device is already active on another node.");
344 0 : retval = -EBUSY;
345 0 : goto failed;
346 : }
347 :
348 0 : skip:
349 : /*
350 : * write a new random sequence number.
351 : */
352 3 : seq = mmp_new_seq();
353 3 : mmp->mmp_seq = cpu_to_le32(seq);
354 :
355 : /*
356 : * On mount / remount we are protected against fs freezing (by s_umount
357 : * semaphore) and grabbing freeze protection upsets lockdep
358 : */
359 3 : retval = write_mmp_block_thawed(sb, bh);
360 3 : if (retval)
361 0 : goto failed;
362 :
363 : /*
364 : * wait for MMP interval and check mmp_seq.
365 : */
366 3 : if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
367 0 : ext4_warning(sb, "MMP startup interrupted, failing mount");
368 0 : retval = -ETIMEDOUT;
369 0 : goto failed;
370 : }
371 :
372 3 : retval = read_mmp_block(sb, &bh, mmp_block);
373 3 : if (retval)
374 0 : goto failed;
375 3 : mmp = (struct mmp_struct *)(bh->b_data);
376 3 : if (seq != le32_to_cpu(mmp->mmp_seq)) {
377 0 : dump_mmp_msg(sb, mmp,
378 : "Device is already active on another node.");
379 0 : retval = -EBUSY;
380 0 : goto failed;
381 : }
382 :
383 3 : EXT4_SB(sb)->s_mmp_bh = bh;
384 :
385 3 : BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
386 3 : snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname),
387 : "%pg", bh->b_bdev);
388 :
389 : /*
390 : * Start a kernel thread to update the MMP block periodically.
391 : */
392 3 : EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s",
393 : (int)sizeof(mmp->mmp_bdevname),
394 : mmp->mmp_bdevname);
395 3 : if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
396 0 : EXT4_SB(sb)->s_mmp_tsk = NULL;
397 0 : ext4_warning(sb, "Unable to create kmmpd thread for %s.",
398 : sb->s_id);
399 0 : retval = -ENOMEM;
400 0 : goto failed;
401 : }
402 :
403 : return 0;
404 :
405 1 : failed:
406 1 : brelse(bh);
407 : return retval;
408 : }
|