Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2008 Oracle. All rights reserved.
4 : */
5 :
6 : #include <linux/sched.h>
7 : #include <linux/pagemap.h>
8 : #include <linux/spinlock.h>
9 : #include <linux/page-flags.h>
10 : #include <asm/bug.h>
11 : #include "misc.h"
12 : #include "ctree.h"
13 : #include "extent_io.h"
14 : #include "locking.h"
15 : #include "accessors.h"
16 :
17 : /*
18 : * Lockdep class keys for extent_buffer->lock's in this root. For a given
19 : * eb, the lockdep key is determined by the btrfs_root it belongs to and
20 : * the level the eb occupies in the tree.
21 : *
22 : * Different roots are used for different purposes and may nest inside each
23 : * other and they require separate keysets. As lockdep keys should be
24 : * static, assign keysets according to the purpose of the root as indicated
25 : * by btrfs_root->root_key.objectid. This ensures that all special purpose
26 : * roots have separate keysets.
27 : *
28 : * Lock-nesting across peer nodes is always done with the immediate parent
29 : * node locked thus preventing deadlock. As lockdep doesn't know this, use
30 : * subclass to avoid triggering lockdep warning in such cases.
31 : *
32 : * The key is set by the readpage_end_io_hook after the buffer has passed
33 : * csum validation but before the pages are unlocked. It is also set by
34 : * btrfs_init_new_buffer on freshly allocated blocks.
35 : *
36 : * We also add a check to make sure the highest level of the tree is the
37 : * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
38 : * needs update as well.
39 : */
40 : #ifdef CONFIG_DEBUG_LOCK_ALLOC
41 : #if BTRFS_MAX_LEVEL != 8
42 : #error
43 : #endif
44 :
45 : #define DEFINE_LEVEL(stem, level) \
46 : .names[level] = "btrfs-" stem "-0" #level,
47 :
48 : #define DEFINE_NAME(stem) \
49 : DEFINE_LEVEL(stem, 0) \
50 : DEFINE_LEVEL(stem, 1) \
51 : DEFINE_LEVEL(stem, 2) \
52 : DEFINE_LEVEL(stem, 3) \
53 : DEFINE_LEVEL(stem, 4) \
54 : DEFINE_LEVEL(stem, 5) \
55 : DEFINE_LEVEL(stem, 6) \
56 : DEFINE_LEVEL(stem, 7)
57 :
58 : static struct btrfs_lockdep_keyset {
59 : u64 id; /* root objectid */
60 : /* Longest entry: btrfs-block-group-00 */
61 : char names[BTRFS_MAX_LEVEL][24];
62 : struct lock_class_key keys[BTRFS_MAX_LEVEL];
63 : } btrfs_lockdep_keysets[] = {
64 : { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
65 : { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
66 : { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
67 : { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
68 : { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
69 : { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
70 : { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
71 : { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
72 : { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
73 : { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
74 : { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
75 : { .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
76 : { .id = 0, DEFINE_NAME("tree") },
77 : };
78 :
79 : #undef DEFINE_LEVEL
80 : #undef DEFINE_NAME
81 :
82 : void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
83 : {
84 : struct btrfs_lockdep_keyset *ks;
85 :
86 : BUG_ON(level >= ARRAY_SIZE(ks->keys));
87 :
88 : /* Find the matching keyset, id 0 is the default entry */
89 : for (ks = btrfs_lockdep_keysets; ks->id; ks++)
90 : if (ks->id == objectid)
91 : break;
92 :
93 : lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
94 : }
95 :
96 : void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
97 : {
98 : if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
99 : btrfs_set_buffer_lockdep_class(root->root_key.objectid,
100 : eb, btrfs_header_level(eb));
101 : }
102 :
103 : #endif
104 :
105 : /*
106 : * Extent buffer locking
107 : * =====================
108 : *
109 : * We use a rw_semaphore for tree locking, and the semantics are exactly the
110 : * same:
111 : *
112 : * - reader/writer exclusion
113 : * - writer/writer exclusion
114 : * - reader/reader sharing
115 : * - try-lock semantics for readers and writers
116 : *
117 : * The rwsem implementation does opportunistic spinning which reduces number of
118 : * times the locking task needs to sleep.
119 : */
120 :
121 : /*
122 : * __btrfs_tree_read_lock - lock extent buffer for read
123 : * @eb: the eb to be locked
124 : * @nest: the nesting level to be used for lockdep
125 : *
126 : * This takes the read lock on the extent buffer, using the specified nesting
127 : * level for lockdep purposes.
128 : */
129 0 : void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
130 : {
131 0 : u64 start_ns = 0;
132 :
133 0 : if (trace_btrfs_tree_read_lock_enabled())
134 0 : start_ns = ktime_get_ns();
135 :
136 0 : down_read_nested(&eb->lock, nest);
137 0 : trace_btrfs_tree_read_lock(eb, start_ns);
138 0 : }
139 :
140 0 : void btrfs_tree_read_lock(struct extent_buffer *eb)
141 : {
142 0 : __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL);
143 0 : }
144 :
145 : /*
146 : * Try-lock for read.
147 : *
148 : * Return 1 if the rwlock has been taken, 0 otherwise
149 : */
150 0 : int btrfs_try_tree_read_lock(struct extent_buffer *eb)
151 : {
152 0 : if (down_read_trylock(&eb->lock)) {
153 0 : trace_btrfs_try_tree_read_lock(eb);
154 0 : return 1;
155 : }
156 : return 0;
157 : }
158 :
159 : /*
160 : * Try-lock for write.
161 : *
162 : * Return 1 if the rwlock has been taken, 0 otherwise
163 : */
164 0 : int btrfs_try_tree_write_lock(struct extent_buffer *eb)
165 : {
166 0 : if (down_write_trylock(&eb->lock)) {
167 0 : eb->lock_owner = current->pid;
168 0 : trace_btrfs_try_tree_write_lock(eb);
169 0 : return 1;
170 : }
171 : return 0;
172 : }
173 :
174 : /*
175 : * Release read lock.
176 : */
177 0 : void btrfs_tree_read_unlock(struct extent_buffer *eb)
178 : {
179 0 : trace_btrfs_tree_read_unlock(eb);
180 0 : up_read(&eb->lock);
181 0 : }
182 :
183 : /*
184 : * __btrfs_tree_lock - lock eb for write
185 : * @eb: the eb to lock
186 : * @nest: the nesting to use for the lock
187 : *
188 : * Returns with the eb->lock write locked.
189 : */
190 0 : void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
191 : __acquires(&eb->lock)
192 : {
193 0 : u64 start_ns = 0;
194 :
195 0 : if (trace_btrfs_tree_lock_enabled())
196 0 : start_ns = ktime_get_ns();
197 :
198 0 : down_write_nested(&eb->lock, nest);
199 0 : eb->lock_owner = current->pid;
200 0 : trace_btrfs_tree_lock(eb, start_ns);
201 0 : }
202 :
203 0 : void btrfs_tree_lock(struct extent_buffer *eb)
204 : {
205 0 : __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL);
206 0 : }
207 :
208 : /*
209 : * Release the write lock.
210 : */
211 0 : void btrfs_tree_unlock(struct extent_buffer *eb)
212 : {
213 0 : trace_btrfs_tree_unlock(eb);
214 0 : eb->lock_owner = 0;
215 0 : up_write(&eb->lock);
216 0 : }
217 :
218 : /*
219 : * This releases any locks held in the path starting at level and going all the
220 : * way up to the root.
221 : *
222 : * btrfs_search_slot will keep the lock held on higher nodes in a few corner
223 : * cases, such as COW of the block at slot zero in the node. This ignores
224 : * those rules, and it should only be called when there are no more updates to
225 : * be done higher up in the tree.
226 : */
227 0 : void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
228 : {
229 0 : int i;
230 :
231 0 : if (path->keep_locks)
232 : return;
233 :
234 0 : for (i = level; i < BTRFS_MAX_LEVEL; i++) {
235 0 : if (!path->nodes[i])
236 0 : continue;
237 0 : if (!path->locks[i])
238 0 : continue;
239 0 : btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
240 0 : path->locks[i] = 0;
241 : }
242 : }
243 :
244 : /*
245 : * Loop around taking references on and locking the root node of the tree until
246 : * we end up with a lock on the root node.
247 : *
248 : * Return: root extent buffer with write lock held
249 : */
250 0 : struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
251 : {
252 0 : struct extent_buffer *eb;
253 :
254 0 : while (1) {
255 0 : eb = btrfs_root_node(root);
256 :
257 0 : btrfs_maybe_reset_lockdep_class(root, eb);
258 0 : btrfs_tree_lock(eb);
259 0 : if (eb == root->node)
260 : break;
261 0 : btrfs_tree_unlock(eb);
262 0 : free_extent_buffer(eb);
263 : }
264 0 : return eb;
265 : }
266 :
267 : /*
268 : * Loop around taking references on and locking the root node of the tree until
269 : * we end up with a lock on the root node.
270 : *
271 : * Return: root extent buffer with read lock held
272 : */
273 0 : struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
274 : {
275 0 : struct extent_buffer *eb;
276 :
277 0 : while (1) {
278 0 : eb = btrfs_root_node(root);
279 :
280 0 : btrfs_maybe_reset_lockdep_class(root, eb);
281 0 : btrfs_tree_read_lock(eb);
282 0 : if (eb == root->node)
283 : break;
284 0 : btrfs_tree_read_unlock(eb);
285 0 : free_extent_buffer(eb);
286 : }
287 0 : return eb;
288 : }
289 :
290 : /*
291 : * Loop around taking references on and locking the root node of the tree in
292 : * nowait mode until we end up with a lock on the root node or returning to
293 : * avoid blocking.
294 : *
295 : * Return: root extent buffer with read lock held or -EAGAIN.
296 : */
297 0 : struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root)
298 : {
299 0 : struct extent_buffer *eb;
300 :
301 0 : while (1) {
302 0 : eb = btrfs_root_node(root);
303 0 : if (!btrfs_try_tree_read_lock(eb)) {
304 0 : free_extent_buffer(eb);
305 0 : return ERR_PTR(-EAGAIN);
306 : }
307 0 : if (eb == root->node)
308 : break;
309 0 : btrfs_tree_read_unlock(eb);
310 0 : free_extent_buffer(eb);
311 : }
312 : return eb;
313 : }
314 :
315 : /*
316 : * DREW locks
317 : * ==========
318 : *
319 : * DREW stands for double-reader-writer-exclusion lock. It's used in situation
320 : * where you want to provide A-B exclusion but not AA or BB.
321 : *
322 : * Currently implementation gives more priority to reader. If a reader and a
323 : * writer both race to acquire their respective sides of the lock the writer
324 : * would yield its lock as soon as it detects a concurrent reader. Additionally
325 : * if there are pending readers no new writers would be allowed to come in and
326 : * acquire the lock.
327 : */
328 :
329 0 : void btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
330 : {
331 0 : atomic_set(&lock->readers, 0);
332 0 : atomic_set(&lock->writers, 0);
333 0 : init_waitqueue_head(&lock->pending_readers);
334 0 : init_waitqueue_head(&lock->pending_writers);
335 0 : }
336 :
337 : /* Return true if acquisition is successful, false otherwise */
338 0 : bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
339 : {
340 0 : if (atomic_read(&lock->readers))
341 : return false;
342 :
343 0 : atomic_inc(&lock->writers);
344 :
345 : /* Ensure writers count is updated before we check for pending readers */
346 0 : smp_mb__after_atomic();
347 0 : if (atomic_read(&lock->readers)) {
348 0 : btrfs_drew_write_unlock(lock);
349 0 : return false;
350 : }
351 :
352 : return true;
353 : }
354 :
355 0 : void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
356 : {
357 0 : while (true) {
358 0 : if (btrfs_drew_try_write_lock(lock))
359 0 : return;
360 0 : wait_event(lock->pending_writers, !atomic_read(&lock->readers));
361 : }
362 : }
363 :
364 0 : void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
365 : {
366 0 : atomic_dec(&lock->writers);
367 0 : cond_wake_up(&lock->pending_readers);
368 0 : }
369 :
370 0 : void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
371 : {
372 0 : atomic_inc(&lock->readers);
373 :
374 : /*
375 : * Ensure the pending reader count is perceieved BEFORE this reader
376 : * goes to sleep in case of active writers. This guarantees new writers
377 : * won't be allowed and that the current reader will be woken up when
378 : * the last active writer finishes its jobs.
379 : */
380 0 : smp_mb__after_atomic();
381 :
382 0 : wait_event(lock->pending_readers, atomic_read(&lock->writers) == 0);
383 0 : }
384 :
385 0 : void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
386 : {
387 : /*
388 : * atomic_dec_and_test implies a full barrier, so woken up writers
389 : * are guaranteed to see the decrement
390 : */
391 0 : if (atomic_dec_and_test(&lock->readers))
392 0 : wake_up(&lock->pending_writers);
393 0 : }
|