OSDN Git Service

perf/x86/uncore: Correct the number of CHAs on EMR
[tomoyo/tomoyo-test1.git] / include / linux / ceph / osd_client.h
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _FS_CEPH_OSD_CLIENT_H
3 #define _FS_CEPH_OSD_CLIENT_H
4
5 #include <linux/bitrev.h>
6 #include <linux/completion.h>
7 #include <linux/kref.h>
8 #include <linux/mempool.h>
9 #include <linux/rbtree.h>
10 #include <linux/refcount.h>
11 #include <linux/ktime.h>
12
13 #include <linux/ceph/types.h>
14 #include <linux/ceph/osdmap.h>
15 #include <linux/ceph/messenger.h>
16 #include <linux/ceph/msgpool.h>
17 #include <linux/ceph/auth.h>
18 #include <linux/ceph/pagelist.h>
19
20 struct ceph_msg;
21 struct ceph_snap_context;
22 struct ceph_osd_request;
23 struct ceph_osd_client;
24
25 /*
26  * completion callback for async writepages
27  */
28 typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *);
29
30 #define CEPH_HOMELESS_OSD       -1
31
32 /* a given osd we're communicating with */
33 struct ceph_osd {
34         refcount_t o_ref;
35         struct ceph_osd_client *o_osdc;
36         int o_osd;
37         int o_incarnation;
38         struct rb_node o_node;
39         struct ceph_connection o_con;
40         struct rb_root o_requests;
41         struct rb_root o_linger_requests;
42         struct rb_root o_backoff_mappings;
43         struct rb_root o_backoffs_by_id;
44         struct list_head o_osd_lru;
45         struct ceph_auth_handshake o_auth;
46         unsigned long lru_ttl;
47         struct list_head o_keepalive_item;
48         struct mutex lock;
49 };
50
51 #define CEPH_OSD_SLAB_OPS       2
52 #define CEPH_OSD_MAX_OPS        16
53
54 enum ceph_osd_data_type {
55         CEPH_OSD_DATA_TYPE_NONE = 0,
56         CEPH_OSD_DATA_TYPE_PAGES,
57         CEPH_OSD_DATA_TYPE_PAGELIST,
58 #ifdef CONFIG_BLOCK
59         CEPH_OSD_DATA_TYPE_BIO,
60 #endif /* CONFIG_BLOCK */
61         CEPH_OSD_DATA_TYPE_BVECS,
62 };
63
64 struct ceph_osd_data {
65         enum ceph_osd_data_type type;
66         union {
67                 struct {
68                         struct page     **pages;
69                         u64             length;
70                         u32             alignment;
71                         bool            pages_from_pool;
72                         bool            own_pages;
73                 };
74                 struct ceph_pagelist    *pagelist;
75 #ifdef CONFIG_BLOCK
76                 struct {
77                         struct ceph_bio_iter    bio_pos;
78                         u32                     bio_length;
79                 };
80 #endif /* CONFIG_BLOCK */
81                 struct {
82                         struct ceph_bvec_iter   bvec_pos;
83                         u32                     num_bvecs;
84                 };
85         };
86 };
87
88 struct ceph_osd_req_op {
89         u16 op;           /* CEPH_OSD_OP_* */
90         u32 flags;        /* CEPH_OSD_OP_FLAG_* */
91         u32 indata_len;   /* request */
92         u32 outdata_len;  /* reply */
93         s32 rval;
94
95         union {
96                 struct ceph_osd_data raw_data_in;
97                 struct {
98                         u64 offset, length;
99                         u64 truncate_size;
100                         u32 truncate_seq;
101                         struct ceph_osd_data osd_data;
102                 } extent;
103                 struct {
104                         u32 name_len;
105                         u32 value_len;
106                         __u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
107                         __u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
108                         struct ceph_osd_data osd_data;
109                 } xattr;
110                 struct {
111                         const char *class_name;
112                         const char *method_name;
113                         struct ceph_osd_data request_info;
114                         struct ceph_osd_data request_data;
115                         struct ceph_osd_data response_data;
116                         __u8 class_len;
117                         __u8 method_len;
118                         u32 indata_len;
119                 } cls;
120                 struct {
121                         u64 cookie;
122                         __u8 op;           /* CEPH_OSD_WATCH_OP_ */
123                         u32 gen;
124                 } watch;
125                 struct {
126                         struct ceph_osd_data request_data;
127                 } notify_ack;
128                 struct {
129                         u64 cookie;
130                         struct ceph_osd_data request_data;
131                         struct ceph_osd_data response_data;
132                 } notify;
133                 struct {
134                         struct ceph_osd_data response_data;
135                 } list_watchers;
136                 struct {
137                         u64 expected_object_size;
138                         u64 expected_write_size;
139                         u32 flags;  /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
140                 } alloc_hint;
141                 struct {
142                         u64 snapid;
143                         u64 src_version;
144                         u8 flags;
145                         u32 src_fadvise_flags;
146                         struct ceph_osd_data osd_data;
147                 } copy_from;
148         };
149 };
150
151 struct ceph_osd_request_target {
152         struct ceph_object_id base_oid;
153         struct ceph_object_locator base_oloc;
154         struct ceph_object_id target_oid;
155         struct ceph_object_locator target_oloc;
156
157         struct ceph_pg pgid;               /* last raw pg we mapped to */
158         struct ceph_spg spgid;             /* last actual spg we mapped to */
159         u32 pg_num;
160         u32 pg_num_mask;
161         struct ceph_osds acting;
162         struct ceph_osds up;
163         int size;
164         int min_size;
165         bool sort_bitwise;
166         bool recovery_deletes;
167
168         unsigned int flags;                /* CEPH_OSD_FLAG_* */
169         bool used_replica;
170         bool paused;
171
172         u32 epoch;
173         u32 last_force_resend;
174
175         int osd;
176 };
177
178 /* an in-flight request */
179 struct ceph_osd_request {
180         u64             r_tid;              /* unique for this client */
181         struct rb_node  r_node;
182         struct rb_node  r_mc_node;          /* map check */
183         struct work_struct r_complete_work;
184         struct ceph_osd *r_osd;
185
186         struct ceph_osd_request_target r_t;
187 #define r_base_oid      r_t.base_oid
188 #define r_base_oloc     r_t.base_oloc
189 #define r_flags         r_t.flags
190
191         struct ceph_msg  *r_request, *r_reply;
192         u32               r_sent;      /* >0 if r_request is sending/sent */
193
194         /* request osd ops array  */
195         unsigned int            r_num_ops;
196
197         int               r_result;
198
199         struct ceph_osd_client *r_osdc;
200         struct kref       r_kref;
201         bool              r_mempool;
202         struct completion r_completion;       /* private to osd_client.c */
203         ceph_osdc_callback_t r_callback;
204
205         struct inode *r_inode;                /* for use by callbacks */
206         struct list_head r_private_item;      /* ditto */
207         void *r_priv;                         /* ditto */
208
209         /* set by submitter */
210         u64 r_snapid;                         /* for reads, CEPH_NOSNAP o/w */
211         struct ceph_snap_context *r_snapc;    /* for writes */
212         struct timespec64 r_mtime;            /* ditto */
213         u64 r_data_offset;                    /* ditto */
214         bool r_linger;                        /* don't resend on failure */
215
216         /* internal */
217         unsigned long r_stamp;                /* jiffies, send or check time */
218         unsigned long r_start_stamp;          /* jiffies */
219         ktime_t r_start_latency;              /* ktime_t */
220         ktime_t r_end_latency;                /* ktime_t */
221         int r_attempts;
222         u32 r_map_dne_bound;
223
224         struct ceph_osd_req_op r_ops[];
225 };
226
227 struct ceph_request_redirect {
228         struct ceph_object_locator oloc;
229 };
230
231 /*
232  * osd request identifier
233  *
234  * caller name + incarnation# + tid to unique identify this request
235  */
236 struct ceph_osd_reqid {
237         struct ceph_entity_name name;
238         __le64 tid;
239         __le32 inc;
240 } __packed;
241
242 struct ceph_blkin_trace_info {
243         __le64 trace_id;
244         __le64 span_id;
245         __le64 parent_span_id;
246 } __packed;
247
248 typedef void (*rados_watchcb2_t)(void *arg, u64 notify_id, u64 cookie,
249                                  u64 notifier_id, void *data, size_t data_len);
250 typedef void (*rados_watcherrcb_t)(void *arg, u64 cookie, int err);
251
252 struct ceph_osd_linger_request {
253         struct ceph_osd_client *osdc;
254         u64 linger_id;
255         bool committed;
256         bool is_watch;                  /* watch or notify */
257
258         struct ceph_osd *osd;
259         struct ceph_osd_request *reg_req;
260         struct ceph_osd_request *ping_req;
261         unsigned long ping_sent;
262         unsigned long watch_valid_thru;
263         struct list_head pending_lworks;
264
265         struct ceph_osd_request_target t;
266         u32 map_dne_bound;
267
268         struct timespec64 mtime;
269
270         struct kref kref;
271         struct mutex lock;
272         struct rb_node node;            /* osd */
273         struct rb_node osdc_node;       /* osdc */
274         struct rb_node mc_node;         /* map check */
275         struct list_head scan_item;
276
277         struct completion reg_commit_wait;
278         struct completion notify_finish_wait;
279         int reg_commit_error;
280         int notify_finish_error;
281         int last_error;
282
283         u32 register_gen;
284         u64 notify_id;
285
286         rados_watchcb2_t wcb;
287         rados_watcherrcb_t errcb;
288         void *data;
289
290         struct ceph_pagelist *request_pl;
291         struct page **notify_id_pages;
292
293         struct page ***preply_pages;
294         size_t *preply_len;
295 };
296
297 struct ceph_watch_item {
298         struct ceph_entity_name name;
299         u64 cookie;
300         struct ceph_entity_addr addr;
301 };
302
303 struct ceph_spg_mapping {
304         struct rb_node node;
305         struct ceph_spg spgid;
306
307         struct rb_root backoffs;
308 };
309
310 struct ceph_hobject_id {
311         void *key;
312         size_t key_len;
313         void *oid;
314         size_t oid_len;
315         u64 snapid;
316         u32 hash;
317         u8 is_max;
318         void *nspace;
319         size_t nspace_len;
320         s64 pool;
321
322         /* cache */
323         u32 hash_reverse_bits;
324 };
325
326 static inline void ceph_hoid_build_hash_cache(struct ceph_hobject_id *hoid)
327 {
328         hoid->hash_reverse_bits = bitrev32(hoid->hash);
329 }
330
331 /*
332  * PG-wide backoff: [begin, end)
333  * per-object backoff: begin == end
334  */
335 struct ceph_osd_backoff {
336         struct rb_node spg_node;
337         struct rb_node id_node;
338
339         struct ceph_spg spgid;
340         u64 id;
341         struct ceph_hobject_id *begin;
342         struct ceph_hobject_id *end;
343 };
344
345 #define CEPH_LINGER_ID_START    0xffff000000000000ULL
346
347 struct ceph_osd_client {
348         struct ceph_client     *client;
349
350         struct ceph_osdmap     *osdmap;       /* current map */
351         struct rw_semaphore    lock;
352
353         struct rb_root         osds;          /* osds */
354         struct list_head       osd_lru;       /* idle osds */
355         spinlock_t             osd_lru_lock;
356         u32                    epoch_barrier;
357         struct ceph_osd        homeless_osd;
358         atomic64_t             last_tid;      /* tid of last request */
359         u64                    last_linger_id;
360         struct rb_root         linger_requests; /* lingering requests */
361         struct rb_root         map_checks;
362         struct rb_root         linger_map_checks;
363         atomic_t               num_requests;
364         atomic_t               num_homeless;
365         int                    abort_err;
366         struct delayed_work    timeout_work;
367         struct delayed_work    osds_timeout_work;
368 #ifdef CONFIG_DEBUG_FS
369         struct dentry          *debugfs_file;
370 #endif
371
372         mempool_t              *req_mempool;
373
374         struct ceph_msgpool     msgpool_op;
375         struct ceph_msgpool     msgpool_op_reply;
376
377         struct workqueue_struct *notify_wq;
378         struct workqueue_struct *completion_wq;
379 };
380
381 static inline bool ceph_osdmap_flag(struct ceph_osd_client *osdc, int flag)
382 {
383         return osdc->osdmap->flags & flag;
384 }
385
386 extern int ceph_osdc_setup(void);
387 extern void ceph_osdc_cleanup(void);
388
389 extern int ceph_osdc_init(struct ceph_osd_client *osdc,
390                           struct ceph_client *client);
391 extern void ceph_osdc_stop(struct ceph_osd_client *osdc);
392 extern void ceph_osdc_reopen_osds(struct ceph_osd_client *osdc);
393
394 extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc,
395                                    struct ceph_msg *msg);
396 extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc,
397                                  struct ceph_msg *msg);
398 void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb);
399 void ceph_osdc_abort_requests(struct ceph_osd_client *osdc, int err);
400 void ceph_osdc_clear_abort_err(struct ceph_osd_client *osdc);
401
402 #define osd_req_op_data(oreq, whch, typ, fld)                           \
403 ({                                                                      \
404         struct ceph_osd_request *__oreq = (oreq);                       \
405         unsigned int __whch = (whch);                                   \
406         BUG_ON(__whch >= __oreq->r_num_ops);                            \
407         &__oreq->r_ops[__whch].typ.fld;                                 \
408 })
409
410 struct ceph_osd_req_op *osd_req_op_init(struct ceph_osd_request *osd_req,
411                             unsigned int which, u16 opcode, u32 flags);
412
413 extern void osd_req_op_raw_data_in_pages(struct ceph_osd_request *,
414                                         unsigned int which,
415                                         struct page **pages, u64 length,
416                                         u32 alignment, bool pages_from_pool,
417                                         bool own_pages);
418
419 extern void osd_req_op_extent_init(struct ceph_osd_request *osd_req,
420                                         unsigned int which, u16 opcode,
421                                         u64 offset, u64 length,
422                                         u64 truncate_size, u32 truncate_seq);
423 extern void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
424                                         unsigned int which, u64 length);
425 extern void osd_req_op_extent_dup_last(struct ceph_osd_request *osd_req,
426                                        unsigned int which, u64 offset_inc);
427
428 extern struct ceph_osd_data *osd_req_op_extent_osd_data(
429                                         struct ceph_osd_request *osd_req,
430                                         unsigned int which);
431
432 extern void osd_req_op_extent_osd_data_pages(struct ceph_osd_request *,
433                                         unsigned int which,
434                                         struct page **pages, u64 length,
435                                         u32 alignment, bool pages_from_pool,
436                                         bool own_pages);
437 extern void osd_req_op_extent_osd_data_pagelist(struct ceph_osd_request *,
438                                         unsigned int which,
439                                         struct ceph_pagelist *pagelist);
440 #ifdef CONFIG_BLOCK
441 void osd_req_op_extent_osd_data_bio(struct ceph_osd_request *osd_req,
442                                     unsigned int which,
443                                     struct ceph_bio_iter *bio_pos,
444                                     u32 bio_length);
445 #endif /* CONFIG_BLOCK */
446 void osd_req_op_extent_osd_data_bvecs(struct ceph_osd_request *osd_req,
447                                       unsigned int which,
448                                       struct bio_vec *bvecs, u32 num_bvecs,
449                                       u32 bytes);
450 void osd_req_op_extent_osd_data_bvec_pos(struct ceph_osd_request *osd_req,
451                                          unsigned int which,
452                                          struct ceph_bvec_iter *bvec_pos);
453
454 extern void osd_req_op_cls_request_data_pagelist(struct ceph_osd_request *,
455                                         unsigned int which,
456                                         struct ceph_pagelist *pagelist);
457 extern void osd_req_op_cls_request_data_pages(struct ceph_osd_request *,
458                                         unsigned int which,
459                                         struct page **pages, u64 length,
460                                         u32 alignment, bool pages_from_pool,
461                                         bool own_pages);
462 void osd_req_op_cls_request_data_bvecs(struct ceph_osd_request *osd_req,
463                                        unsigned int which,
464                                        struct bio_vec *bvecs, u32 num_bvecs,
465                                        u32 bytes);
466 extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
467                                         unsigned int which,
468                                         struct page **pages, u64 length,
469                                         u32 alignment, bool pages_from_pool,
470                                         bool own_pages);
471 int osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
472                         const char *class, const char *method);
473 extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
474                                  u16 opcode, const char *name, const void *value,
475                                  size_t size, u8 cmp_op, u8 cmp_mode);
476 extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
477                                        unsigned int which,
478                                        u64 expected_object_size,
479                                        u64 expected_write_size,
480                                        u32 flags);
481 extern int osd_req_op_copy_from_init(struct ceph_osd_request *req,
482                                      u64 src_snapid, u64 src_version,
483                                      struct ceph_object_id *src_oid,
484                                      struct ceph_object_locator *src_oloc,
485                                      u32 src_fadvise_flags,
486                                      u32 dst_fadvise_flags,
487                                      u32 truncate_seq, u64 truncate_size,
488                                      u8 copy_from_flags);
489
490 extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
491                                                struct ceph_snap_context *snapc,
492                                                unsigned int num_ops,
493                                                bool use_mempool,
494                                                gfp_t gfp_flags);
495 int ceph_osdc_alloc_messages(struct ceph_osd_request *req, gfp_t gfp);
496
497 extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
498                                       struct ceph_file_layout *layout,
499                                       struct ceph_vino vino,
500                                       u64 offset, u64 *len,
501                                       unsigned int which, int num_ops,
502                                       int opcode, int flags,
503                                       struct ceph_snap_context *snapc,
504                                       u32 truncate_seq, u64 truncate_size,
505                                       bool use_mempool);
506
507 extern void ceph_osdc_get_request(struct ceph_osd_request *req);
508 extern void ceph_osdc_put_request(struct ceph_osd_request *req);
509
510 void ceph_osdc_start_request(struct ceph_osd_client *osdc,
511                              struct ceph_osd_request *req);
512 extern void ceph_osdc_cancel_request(struct ceph_osd_request *req);
513 extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc,
514                                   struct ceph_osd_request *req);
515 extern void ceph_osdc_sync(struct ceph_osd_client *osdc);
516
517 extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc);
518 void ceph_osdc_maybe_request_map(struct ceph_osd_client *osdc);
519
520 int ceph_osdc_call(struct ceph_osd_client *osdc,
521                    struct ceph_object_id *oid,
522                    struct ceph_object_locator *oloc,
523                    const char *class, const char *method,
524                    unsigned int flags,
525                    struct page *req_page, size_t req_len,
526                    struct page **resp_pages, size_t *resp_len);
527
528 /* watch/notify */
529 struct ceph_osd_linger_request *
530 ceph_osdc_watch(struct ceph_osd_client *osdc,
531                 struct ceph_object_id *oid,
532                 struct ceph_object_locator *oloc,
533                 rados_watchcb2_t wcb,
534                 rados_watcherrcb_t errcb,
535                 void *data);
536 int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
537                       struct ceph_osd_linger_request *lreq);
538
539 int ceph_osdc_notify_ack(struct ceph_osd_client *osdc,
540                          struct ceph_object_id *oid,
541                          struct ceph_object_locator *oloc,
542                          u64 notify_id,
543                          u64 cookie,
544                          void *payload,
545                          u32 payload_len);
546 int ceph_osdc_notify(struct ceph_osd_client *osdc,
547                      struct ceph_object_id *oid,
548                      struct ceph_object_locator *oloc,
549                      void *payload,
550                      u32 payload_len,
551                      u32 timeout,
552                      struct page ***preply_pages,
553                      size_t *preply_len);
554 int ceph_osdc_watch_check(struct ceph_osd_client *osdc,
555                           struct ceph_osd_linger_request *lreq);
556 int ceph_osdc_list_watchers(struct ceph_osd_client *osdc,
557                             struct ceph_object_id *oid,
558                             struct ceph_object_locator *oloc,
559                             struct ceph_watch_item **watchers,
560                             u32 *num_watchers);
561 #endif
562