Line data Source code
1 : /* SPDX-License-Identifier: GPL-2.0 */
2 :
3 : #ifndef BTRFS_BLOCK_GROUP_H
4 : #define BTRFS_BLOCK_GROUP_H
5 :
6 : #include "free-space-cache.h"
7 :
8 : enum btrfs_disk_cache_state {
9 : BTRFS_DC_WRITTEN,
10 : BTRFS_DC_ERROR,
11 : BTRFS_DC_CLEAR,
12 : BTRFS_DC_SETUP,
13 : };
14 :
15 : enum btrfs_block_group_size_class {
16 : /* Unset */
17 : BTRFS_BG_SZ_NONE,
18 : /* 0 < size <= 128K */
19 : BTRFS_BG_SZ_SMALL,
20 : /* 128K < size <= 8M */
21 : BTRFS_BG_SZ_MEDIUM,
22 : /* 8M < size < BG_LENGTH */
23 : BTRFS_BG_SZ_LARGE,
24 : };
25 :
26 : /*
27 : * This describes the state of the block_group for async discard. This is due
28 : * to the two pass nature of it where extent discarding is prioritized over
29 : * bitmap discarding. BTRFS_DISCARD_RESET_CURSOR is set when we are resetting
30 : * between lists to prevent contention for discard state variables
31 : * (eg. discard_cursor).
32 : */
33 : enum btrfs_discard_state {
34 : BTRFS_DISCARD_EXTENTS,
35 : BTRFS_DISCARD_BITMAPS,
36 : BTRFS_DISCARD_RESET_CURSOR,
37 : };
38 :
39 : /*
40 : * Control flags for do_chunk_alloc's force field CHUNK_ALLOC_NO_FORCE means to
41 : * only allocate a chunk if we really need one.
42 : *
43 : * CHUNK_ALLOC_LIMITED means to only try and allocate one if we have very few
44 : * chunks already allocated. This is used as part of the clustering code to
45 : * help make sure we have a good pool of storage to cluster in, without filling
46 : * the FS with empty chunks
47 : *
48 : * CHUNK_ALLOC_FORCE means it must try to allocate one
49 : *
50 : * CHUNK_ALLOC_FORCE_FOR_EXTENT like CHUNK_ALLOC_FORCE but called from
51 : * find_free_extent() that also activaes the zone
52 : */
53 : enum btrfs_chunk_alloc_enum {
54 : CHUNK_ALLOC_NO_FORCE,
55 : CHUNK_ALLOC_LIMITED,
56 : CHUNK_ALLOC_FORCE,
57 : CHUNK_ALLOC_FORCE_FOR_EXTENT,
58 : };
59 :
60 : /* Block group flags set at runtime */
61 : enum btrfs_block_group_flags {
62 : BLOCK_GROUP_FLAG_IREF,
63 : BLOCK_GROUP_FLAG_REMOVED,
64 : BLOCK_GROUP_FLAG_TO_COPY,
65 : BLOCK_GROUP_FLAG_RELOCATING_REPAIR,
66 : BLOCK_GROUP_FLAG_CHUNK_ITEM_INSERTED,
67 : BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE,
68 : BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
69 : /* Does the block group need to be added to the free space tree? */
70 : BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
71 : /* Indicate that the block group is placed on a sequential zone */
72 : BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
73 : /*
74 : * Indicate that block group is in the list of new block groups of a
75 : * transaction.
76 : */
77 : BLOCK_GROUP_FLAG_NEW,
78 : };
79 :
80 : enum btrfs_caching_type {
81 : BTRFS_CACHE_NO,
82 : BTRFS_CACHE_STARTED,
83 : BTRFS_CACHE_FINISHED,
84 : BTRFS_CACHE_ERROR,
85 : };
86 :
87 : struct btrfs_caching_control {
88 : struct list_head list;
89 : struct mutex mutex;
90 : wait_queue_head_t wait;
91 : struct btrfs_work work;
92 : struct btrfs_block_group *block_group;
93 : refcount_t count;
94 : };
95 :
96 : /* Once caching_thread() finds this much free space, it will wake up waiters. */
97 : #define CACHING_CTL_WAKE_UP SZ_2M
98 :
99 : struct btrfs_block_group {
100 : struct btrfs_fs_info *fs_info;
101 : struct inode *inode;
102 : spinlock_t lock;
103 : u64 start;
104 : u64 length;
105 : u64 pinned;
106 : u64 reserved;
107 : u64 used;
108 : u64 delalloc_bytes;
109 : u64 bytes_super;
110 : u64 flags;
111 : u64 cache_generation;
112 : u64 global_root_id;
113 :
114 : /*
115 : * The last committed used bytes of this block group, if the above @used
116 : * is still the same as @commit_used, we don't need to update block
117 : * group item of this block group.
118 : */
119 : u64 commit_used;
120 : /*
121 : * If the free space extent count exceeds this number, convert the block
122 : * group to bitmaps.
123 : */
124 : u32 bitmap_high_thresh;
125 :
126 : /*
127 : * If the free space extent count drops below this number, convert the
128 : * block group back to extents.
129 : */
130 : u32 bitmap_low_thresh;
131 :
132 : /*
133 : * It is just used for the delayed data space allocation because
134 : * only the data space allocation and the relative metadata update
135 : * can be done cross the transaction.
136 : */
137 : struct rw_semaphore data_rwsem;
138 :
139 : /* For raid56, this is a full stripe, without parity */
140 : unsigned long full_stripe_len;
141 : unsigned long runtime_flags;
142 :
143 : unsigned int ro;
144 :
145 : int disk_cache_state;
146 :
147 : /* Cache tracking stuff */
148 : int cached;
149 : struct btrfs_caching_control *caching_ctl;
150 :
151 : struct btrfs_space_info *space_info;
152 :
153 : /* Free space cache stuff */
154 : struct btrfs_free_space_ctl *free_space_ctl;
155 :
156 : /* Block group cache stuff */
157 : struct rb_node cache_node;
158 :
159 : /* For block groups in the same raid type */
160 : struct list_head list;
161 :
162 : refcount_t refs;
163 :
164 : /*
165 : * List of struct btrfs_free_clusters for this block group.
166 : * Today it will only have one thing on it, but that may change
167 : */
168 : struct list_head cluster_list;
169 :
170 : /*
171 : * Used for several lists:
172 : *
173 : * 1) struct btrfs_fs_info::unused_bgs
174 : * 2) struct btrfs_fs_info::reclaim_bgs
175 : * 3) struct btrfs_transaction::deleted_bgs
176 : * 4) struct btrfs_trans_handle::new_bgs
177 : */
178 : struct list_head bg_list;
179 :
180 : /* For read-only block groups */
181 : struct list_head ro_list;
182 :
183 : /*
184 : * When non-zero it means the block group's logical address and its
185 : * device extents can not be reused for future block group allocations
186 : * until the counter goes down to 0. This is to prevent them from being
187 : * reused while some task is still using the block group after it was
188 : * deleted - we want to make sure they can only be reused for new block
189 : * groups after that task is done with the deleted block group.
190 : */
191 : atomic_t frozen;
192 :
193 : /* For discard operations */
194 : struct list_head discard_list;
195 : int discard_index;
196 : u64 discard_eligible_time;
197 : u64 discard_cursor;
198 : enum btrfs_discard_state discard_state;
199 :
200 : /* For dirty block groups */
201 : struct list_head dirty_list;
202 : struct list_head io_list;
203 :
204 : struct btrfs_io_ctl io_ctl;
205 :
206 : /*
207 : * Incremented when doing extent allocations and holding a read lock
208 : * on the space_info's groups_sem semaphore.
209 : * Decremented when an ordered extent that represents an IO against this
210 : * block group's range is created (after it's added to its inode's
211 : * root's list of ordered extents) or immediately after the allocation
212 : * if it's a metadata extent or fallocate extent (for these cases we
213 : * don't create ordered extents).
214 : */
215 : atomic_t reservations;
216 :
217 : /*
218 : * Incremented while holding the spinlock *lock* by a task checking if
219 : * it can perform a nocow write (incremented if the value for the *ro*
220 : * field is 0). Decremented by such tasks once they create an ordered
221 : * extent or before that if some error happens before reaching that step.
222 : * This is to prevent races between block group relocation and nocow
223 : * writes through direct IO.
224 : */
225 : atomic_t nocow_writers;
226 :
227 : /* Lock for free space tree operations. */
228 : struct mutex free_space_lock;
229 :
230 : /*
231 : * Number of extents in this block group used for swap files.
232 : * All accesses protected by the spinlock 'lock'.
233 : */
234 : int swap_extents;
235 :
236 : /*
237 : * Allocation offset for the block group to implement sequential
238 : * allocation. This is used only on a zoned filesystem.
239 : */
240 : u64 alloc_offset;
241 : u64 zone_unusable;
242 : u64 zone_capacity;
243 : u64 meta_write_pointer;
244 : struct map_lookup *physical_map;
245 : struct list_head active_bg_list;
246 : struct work_struct zone_finish_work;
247 : struct extent_buffer *last_eb;
248 : enum btrfs_block_group_size_class size_class;
249 : };
250 :
251 : static inline u64 btrfs_block_group_end(struct btrfs_block_group *block_group)
252 : {
253 4306 : return (block_group->start + block_group->length);
254 : }
255 :
256 : static inline bool btrfs_is_block_group_data_only(
257 : struct btrfs_block_group *block_group)
258 : {
259 : /*
260 : * In mixed mode the fragmentation is expected to be high, lowering the
261 : * efficiency, so only proper data block groups are considered.
262 : */
263 48728214 : return (block_group->flags & BTRFS_BLOCK_GROUP_DATA) &&
264 : !(block_group->flags & BTRFS_BLOCK_GROUP_METADATA);
265 : }
266 :
267 : #ifdef CONFIG_BTRFS_DEBUG
268 : int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group);
269 : #endif
270 :
271 : struct btrfs_block_group *btrfs_lookup_first_block_group(
272 : struct btrfs_fs_info *info, u64 bytenr);
273 : struct btrfs_block_group *btrfs_lookup_block_group(
274 : struct btrfs_fs_info *info, u64 bytenr);
275 : struct btrfs_block_group *btrfs_next_block_group(
276 : struct btrfs_block_group *cache);
277 : void btrfs_get_block_group(struct btrfs_block_group *cache);
278 : void btrfs_put_block_group(struct btrfs_block_group *cache);
279 : void btrfs_dec_block_group_reservations(struct btrfs_fs_info *fs_info,
280 : const u64 start);
281 : void btrfs_wait_block_group_reservations(struct btrfs_block_group *bg);
282 : struct btrfs_block_group *btrfs_inc_nocow_writers(struct btrfs_fs_info *fs_info,
283 : u64 bytenr);
284 : void btrfs_dec_nocow_writers(struct btrfs_block_group *bg);
285 : void btrfs_wait_nocow_writers(struct btrfs_block_group *bg);
286 : void btrfs_wait_block_group_cache_progress(struct btrfs_block_group *cache,
287 : u64 num_bytes);
288 : int btrfs_cache_block_group(struct btrfs_block_group *cache, bool wait);
289 : void btrfs_put_caching_control(struct btrfs_caching_control *ctl);
290 : struct btrfs_caching_control *btrfs_get_caching_control(
291 : struct btrfs_block_group *cache);
292 : int add_new_free_space(struct btrfs_block_group *block_group,
293 : u64 start, u64 end, u64 *total_added_ret);
294 : struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
295 : struct btrfs_fs_info *fs_info,
296 : const u64 chunk_offset);
297 : int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
298 : u64 group_start, struct extent_map *em);
299 : void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info);
300 : void btrfs_mark_bg_unused(struct btrfs_block_group *bg);
301 : void btrfs_reclaim_bgs_work(struct work_struct *work);
302 : void btrfs_reclaim_bgs(struct btrfs_fs_info *fs_info);
303 : void btrfs_mark_bg_to_reclaim(struct btrfs_block_group *bg);
304 : int btrfs_read_block_groups(struct btrfs_fs_info *info);
305 : struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *trans,
306 : u64 type,
307 : u64 chunk_offset, u64 size);
308 : void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans);
309 : int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
310 : bool do_chunk_alloc);
311 : void btrfs_dec_block_group_ro(struct btrfs_block_group *cache);
312 : int btrfs_start_dirty_block_groups(struct btrfs_trans_handle *trans);
313 : int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans);
314 : int btrfs_setup_space_cache(struct btrfs_trans_handle *trans);
315 : int btrfs_update_block_group(struct btrfs_trans_handle *trans,
316 : u64 bytenr, u64 num_bytes, bool alloc);
317 : int btrfs_add_reserved_bytes(struct btrfs_block_group *cache,
318 : u64 ram_bytes, u64 num_bytes, int delalloc,
319 : bool force_wrong_size_class);
320 : void btrfs_free_reserved_bytes(struct btrfs_block_group *cache,
321 : u64 num_bytes, int delalloc);
322 : int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
323 : enum btrfs_chunk_alloc_enum force);
324 : int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, u64 type);
325 : void check_system_chunk(struct btrfs_trans_handle *trans, const u64 type);
326 : void btrfs_reserve_chunk_metadata(struct btrfs_trans_handle *trans,
327 : bool is_item_insertion);
328 : u64 btrfs_get_alloc_profile(struct btrfs_fs_info *fs_info, u64 orig_flags);
329 : void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
330 : int btrfs_free_block_groups(struct btrfs_fs_info *info);
331 : int btrfs_rmap_block(struct btrfs_fs_info *fs_info, u64 chunk_start,
332 : u64 physical, u64 **logical, int *naddrs, int *stripe_len);
333 :
334 : static inline u64 btrfs_data_alloc_profile(struct btrfs_fs_info *fs_info)
335 : {
336 2620733 : return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_DATA);
337 : }
338 :
339 : static inline u64 btrfs_metadata_alloc_profile(struct btrfs_fs_info *fs_info)
340 : {
341 71032438 : return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_METADATA);
342 : }
343 :
344 : static inline u64 btrfs_system_alloc_profile(struct btrfs_fs_info *fs_info)
345 : {
346 191 : return btrfs_get_alloc_profile(fs_info, BTRFS_BLOCK_GROUP_SYSTEM);
347 : }
348 :
349 : static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
350 : {
351 31387068 : smp_mb();
352 31390155 : return cache->cached == BTRFS_CACHE_FINISHED ||
353 : cache->cached == BTRFS_CACHE_ERROR;
354 : }
355 :
356 : void btrfs_freeze_block_group(struct btrfs_block_group *cache);
357 : void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
358 :
359 : bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
360 : void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
361 :
362 : enum btrfs_block_group_size_class btrfs_calc_block_group_size_class(u64 size);
363 : int btrfs_use_block_group_size_class(struct btrfs_block_group *bg,
364 : enum btrfs_block_group_size_class size_class,
365 : bool force_wrong_size_class);
366 : bool btrfs_block_group_should_use_size_class(struct btrfs_block_group *bg);
367 :
368 : #endif /* BTRFS_BLOCK_GROUP_H */
|