Line data Source code
1 : /* SPDX-License-Identifier: GPL-2.0 */ 2 : #ifndef __LINUX_BACKING_DEV_DEFS_H 3 : #define __LINUX_BACKING_DEV_DEFS_H 4 : 5 : #include <linux/list.h> 6 : #include <linux/radix-tree.h> 7 : #include <linux/rbtree.h> 8 : #include <linux/spinlock.h> 9 : #include <linux/percpu_counter.h> 10 : #include <linux/percpu-refcount.h> 11 : #include <linux/flex_proportions.h> 12 : #include <linux/timer.h> 13 : #include <linux/workqueue.h> 14 : #include <linux/kref.h> 15 : #include <linux/refcount.h> 16 : 17 : struct page; 18 : struct device; 19 : struct dentry; 20 : 21 : /* 22 : * Bits in bdi_writeback.state 23 : */ 24 : enum wb_state { 25 : WB_registered, /* bdi_register() was done */ 26 : WB_writeback_running, /* Writeback is in progress */ 27 : WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ 28 : WB_start_all, /* nr_pages == 0 (all) work pending */ 29 : }; 30 : 31 : enum wb_stat_item { 32 : WB_RECLAIMABLE, 33 : WB_WRITEBACK, 34 : WB_DIRTIED, 35 : WB_WRITTEN, 36 : NR_WB_STAT_ITEMS 37 : }; 38 : 39 : #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) 40 : 41 : /* 42 : * why some writeback work was initiated 43 : */ 44 : enum wb_reason { 45 : WB_REASON_BACKGROUND, 46 : WB_REASON_VMSCAN, 47 : WB_REASON_SYNC, 48 : WB_REASON_PERIODIC, 49 : WB_REASON_LAPTOP_TIMER, 50 : WB_REASON_FS_FREE_SPACE, 51 : /* 52 : * There is no bdi forker thread any more and works are done 53 : * by emergency worker, however, this is TPs userland visible 54 : * and we'll be exposing exactly the same information, 55 : * so it has a mismatch name. 56 : */ 57 : WB_REASON_FORKER_THREAD, 58 : WB_REASON_FOREIGN_FLUSH, 59 : 60 : WB_REASON_MAX, 61 : }; 62 : 63 : struct wb_completion { 64 : atomic_t cnt; 65 : wait_queue_head_t *waitq; 66 : }; 67 : 68 : #define __WB_COMPLETION_INIT(_waitq) \ 69 : (struct wb_completion){ .cnt = ATOMIC_INIT(1), .waitq = (_waitq) } 70 : 71 : /* 72 : * If one wants to wait for one or more wb_writeback_works, each work's 73 : * ->done should be set to a wb_completion defined using the following 74 : * macro. Once all work items are issued with wb_queue_work(), the caller 75 : * can wait for the completion of all using wb_wait_for_completion(). Work 76 : * items which are waited upon aren't freed automatically on completion. 77 : */ 78 : #define WB_COMPLETION_INIT(bdi) __WB_COMPLETION_INIT(&(bdi)->wb_waitq) 79 : 80 : #define DEFINE_WB_COMPLETION(cmpl, bdi) \ 81 : struct wb_completion cmpl = WB_COMPLETION_INIT(bdi) 82 : 83 : /* 84 : * Each wb (bdi_writeback) can perform writeback operations, is measured 85 : * and throttled, independently. Without cgroup writeback, each bdi 86 : * (bdi_writeback) is served by its embedded bdi->wb. 87 : * 88 : * On the default hierarchy, blkcg implicitly enables memcg. This allows 89 : * using memcg's page ownership for attributing writeback IOs, and every 90 : * memcg - blkcg combination can be served by its own wb by assigning a 91 : * dedicated wb to each memcg, which enables isolation across different 92 : * cgroups and propagation of IO back pressure down from the IO layer upto 93 : * the tasks which are generating the dirty pages to be written back. 94 : * 95 : * A cgroup wb is indexed on its bdi by the ID of the associated memcg, 96 : * refcounted with the number of inodes attached to it, and pins the memcg 97 : * and the corresponding blkcg. As the corresponding blkcg for a memcg may 98 : * change as blkcg is disabled and enabled higher up in the hierarchy, a wb 99 : * is tested for blkcg after lookup and removed from index on mismatch so 100 : * that a new wb for the combination can be created. 101 : * 102 : * Each bdi_writeback that is not embedded into the backing_dev_info must hold 103 : * a reference to the parent backing_dev_info. See cgwb_create() for details. 104 : */ 105 : struct bdi_writeback { 106 : struct backing_dev_info *bdi; /* our parent bdi */ 107 : 108 : unsigned long state; /* Always use atomic bitops on this */ 109 : unsigned long last_old_flush; /* last old data flush */ 110 : 111 : struct list_head b_dirty; /* dirty inodes */ 112 : struct list_head b_io; /* parked for writeback */ 113 : struct list_head b_more_io; /* parked for more writeback */ 114 : struct list_head b_dirty_time; /* time stamps are dirty */ 115 : spinlock_t list_lock; /* protects the b_* lists */ 116 : 117 : atomic_t writeback_inodes; /* number of inodes under writeback */ 118 : struct percpu_counter stat[NR_WB_STAT_ITEMS]; 119 : 120 : unsigned long bw_time_stamp; /* last time write bw is updated */ 121 : unsigned long dirtied_stamp; 122 : unsigned long written_stamp; /* pages written at bw_time_stamp */ 123 : unsigned long write_bandwidth; /* the estimated write bandwidth */ 124 : unsigned long avg_write_bandwidth; /* further smoothed write bw, > 0 */ 125 : 126 : /* 127 : * The base dirty throttle rate, re-calculated on every 200ms. 128 : * All the bdi tasks' dirty rate will be curbed under it. 129 : * @dirty_ratelimit tracks the estimated @balanced_dirty_ratelimit 130 : * in small steps and is much more smooth/stable than the latter. 131 : */ 132 : unsigned long dirty_ratelimit; 133 : unsigned long balanced_dirty_ratelimit; 134 : 135 : struct fprop_local_percpu completions; 136 : int dirty_exceeded; 137 : enum wb_reason start_all_reason; 138 : 139 : spinlock_t work_lock; /* protects work_list & dwork scheduling */ 140 : struct list_head work_list; 141 : struct delayed_work dwork; /* work item used for writeback */ 142 : struct delayed_work bw_dwork; /* work item used for bandwidth estimate */ 143 : 144 : unsigned long dirty_sleep; /* last wait */ 145 : 146 : struct list_head bdi_node; /* anchored at bdi->wb_list */ 147 : 148 : #ifdef CONFIG_CGROUP_WRITEBACK 149 : struct percpu_ref refcnt; /* used only for !root wb's */ 150 : struct fprop_local_percpu memcg_completions; 151 : struct cgroup_subsys_state *memcg_css; /* the associated memcg */ 152 : struct cgroup_subsys_state *blkcg_css; /* and blkcg */ 153 : struct list_head memcg_node; /* anchored at memcg->cgwb_list */ 154 : struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */ 155 : struct list_head b_attached; /* attached inodes, protected by list_lock */ 156 : struct list_head offline_node; /* anchored at offline_cgwbs */ 157 : 158 : union { 159 : struct work_struct release_work; 160 : struct rcu_head rcu; 161 : }; 162 : #endif 163 : }; 164 : 165 : struct backing_dev_info { 166 : u64 id; 167 : struct rb_node rb_node; /* keyed by ->id */ 168 : struct list_head bdi_list; 169 : unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ 170 : unsigned long io_pages; /* max allowed IO size */ 171 : 172 : struct kref refcnt; /* Reference counter for the structure */ 173 : unsigned int capabilities; /* Device capabilities */ 174 : unsigned int min_ratio; 175 : unsigned int max_ratio, max_prop_frac; 176 : 177 : /* 178 : * Sum of avg_write_bw of wbs with dirty inodes. > 0 if there are 179 : * any dirty wbs, which is depended upon by bdi_has_dirty(). 180 : */ 181 : atomic_long_t tot_write_bandwidth; 182 : 183 : struct bdi_writeback wb; /* the root writeback info for this bdi */ 184 : struct list_head wb_list; /* list of all wbs */ 185 : #ifdef CONFIG_CGROUP_WRITEBACK 186 : struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ 187 : struct mutex cgwb_release_mutex; /* protect shutdown of wb structs */ 188 : struct rw_semaphore wb_switch_rwsem; /* no cgwb switch while syncing */ 189 : #endif 190 : wait_queue_head_t wb_waitq; 191 : 192 : struct device *dev; 193 : char dev_name[64]; 194 : struct device *owner; 195 : 196 : struct timer_list laptop_mode_wb_timer; 197 : 198 : #ifdef CONFIG_DEBUG_FS 199 : struct dentry *debug_dir; 200 : #endif 201 : }; 202 : 203 : struct wb_lock_cookie { 204 : bool locked; 205 : unsigned long flags; 206 : }; 207 : 208 : #ifdef CONFIG_CGROUP_WRITEBACK 209 : 210 : /** 211 : * wb_tryget - try to increment a wb's refcount 212 : * @wb: bdi_writeback to get 213 : */ 214 72978024 : static inline bool wb_tryget(struct bdi_writeback *wb) 215 : { 216 72978024 : if (wb != &wb->bdi->wb) 217 72916621 : return percpu_ref_tryget(&wb->refcnt); 218 : return true; 219 : } 220 : 221 : /** 222 : * wb_get - increment a wb's refcount 223 : * @wb: bdi_writeback to get 224 : */ 225 72504829 : static inline void wb_get(struct bdi_writeback *wb) 226 : { 227 72504829 : if (wb != &wb->bdi->wb) 228 71824308 : percpu_ref_get(&wb->refcnt); 229 72504638 : } 230 : 231 : /** 232 : * wb_put - decrement a wb's refcount 233 : * @wb: bdi_writeback to put 234 : * @nr: number of references to put 235 : */ 236 153795032 : static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) 237 : { 238 153795032 : if (WARN_ON_ONCE(!wb->bdi)) { 239 : /* 240 : * A driver bug might cause a file to be removed before bdi was 241 : * initialized. 242 : */ 243 : return; 244 : } 245 : 246 153795032 : if (wb != &wb->bdi->wb) 247 151582401 : percpu_ref_put_many(&wb->refcnt, nr); 248 : } 249 : 250 : /** 251 : * wb_put - decrement a wb's refcount 252 : * @wb: bdi_writeback to put 253 : */ 254 : static inline void wb_put(struct bdi_writeback *wb) 255 : { 256 153788796 : wb_put_many(wb, 1); 257 0 : } 258 : 259 : /** 260 : * wb_dying - is a wb dying? 261 : * @wb: bdi_writeback of interest 262 : * 263 : * Returns whether @wb is unlinked and being drained. 264 : */ 265 : static inline bool wb_dying(struct bdi_writeback *wb) 266 : { 267 31150893 : return percpu_ref_is_dying(&wb->refcnt); 268 : } 269 : 270 : #else /* CONFIG_CGROUP_WRITEBACK */ 271 : 272 : static inline bool wb_tryget(struct bdi_writeback *wb) 273 : { 274 : return true; 275 : } 276 : 277 : static inline void wb_get(struct bdi_writeback *wb) 278 : { 279 : } 280 : 281 : static inline void wb_put(struct bdi_writeback *wb) 282 : { 283 : } 284 : 285 : static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr) 286 : { 287 : } 288 : 289 : static inline bool wb_dying(struct bdi_writeback *wb) 290 : { 291 : return false; 292 : } 293 : 294 : #endif /* CONFIG_CGROUP_WRITEBACK */ 295 : 296 : #endif /* __LINUX_BACKING_DEV_DEFS_H */