1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
|
/* SPDX-License-Identifier: GPL-2.0 */
/*
* linux/include/linux/sunrpc/svc.h
*
* RPC server declarations.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef SUNRPC_SVC_H
#define SUNRPC_SVC_H
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/svcauth.h>
#include <linux/lwq.h>
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/folio_batch.h>
#include <linux/kthread.h>
/*
*
* RPC service thread pool.
*
* Pool of threads and temporary sockets. Generally there is only
* a single one of these per RPC service, but on NUMA machines those
* services that can benefit from it (i.e. nfs but not lockd) will
* have one pool per NUMA node. This optimisation reduces cross-
* node traffic on multi-node NUMA NFS servers.
*/
struct svc_pool {
unsigned int sp_id; /* pool id; also node id on NUMA */
unsigned int sp_nrthreads; /* # of threads currently running in pool */
unsigned int sp_nrthrmin; /* Min number of threads to run per pool */
unsigned int sp_nrthrmax; /* Max requested number of threads in pool */
struct lwq sp_xprts; /* pending transports */
struct list_head sp_all_threads; /* all server threads */
struct llist_head sp_idle_threads; /* idle server threads */
/* statistics on pool operation */
struct percpu_counter sp_messages_arrived;
struct percpu_counter sp_sockets_queued;
struct percpu_counter sp_threads_woken;
unsigned long sp_flags;
} ____cacheline_aligned_in_smp;
/* bits for sp_flags */
enum {
SP_TASK_PENDING, /* still work to do even if no xprt is queued */
SP_NEED_VICTIM, /* One thread needs to agree to exit */
SP_VICTIM_REMAINS, /* One thread needs to actually exit */
SP_TASK_STARTING, /* Task has started but not added to idle yet */
};
/*
* RPC service.
*
* An RPC service is a ``daemon,'' possibly multithreaded, which
* receives and processes incoming RPC messages.
* It has one or more transport sockets associated with it, and maintains
* a list of idle threads waiting for input.
*
* We currently do not support more than one RPC program per daemon.
*/
struct svc_serv {
struct svc_program * sv_programs; /* RPC programs */
struct svc_stat * sv_stats; /* RPC statistics */
spinlock_t sv_lock;
unsigned int sv_nprogs; /* Number of sv_programs */
unsigned int sv_nrthreads; /* # of running server threads */
unsigned int sv_max_payload; /* datagram payload size */
unsigned int sv_max_mesg; /* max_payload + 1 page for overheads */
unsigned int sv_xdrsize; /* XDR buffer size */
struct list_head sv_permsocks; /* all permanent sockets */
struct list_head sv_tempsocks; /* all temporary sockets */
int sv_tmpcnt; /* count of temporary "valid" sockets */
struct timer_list sv_temptimer; /* timer for aging temporary sockets */
char * sv_name; /* service name */
unsigned int sv_nrpools; /* number of thread pools */
bool sv_is_pooled; /* is this a pooled service? */
struct svc_pool * sv_pools; /* array of thread pools */
int (*sv_threadfn)(void *data);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct lwq sv_cb_list; /* queue for callback requests
* that arrive over the same
* connection */
bool sv_bc_enabled; /* service uses backchannel */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
};
/* This is used by pool_stats to find and lock an svc */
struct svc_info {
struct svc_serv *serv;
struct mutex *mutex;
};
void svc_destroy(struct svc_serv **svcp);
/*
* Maximum payload size supported by a kernel RPC server.
* This is use to determine the max number of pages nfsd is
* willing to return in a single READ operation.
*
* These happen to all be powers of 2, which is not strictly
* necessary but helps enforce the real limitation, which is
* that they should be multiples of PAGE_SIZE.
*
* For UDP transports, a block plus NFS,RPC, and UDP headers
* has to fit into the IP datagram limit of 64K. The largest
* feasible number for all known page sizes is probably 48K,
* but we choose 32K here. This is the same as the historical
* Linux limit; someone who cares more about NFS/UDP performance
* can test a larger number.
*
* For non-UDP transports we have more freedom. A size of 4MB is
* chosen to accommodate clients that support larger I/O sizes.
*/
enum {
RPCSVC_MAXPAYLOAD = 4 * 1024 * 1024,
RPCSVC_MAXPAYLOAD_TCP = RPCSVC_MAXPAYLOAD,
RPCSVC_MAXPAYLOAD_UDP = 32 * 1024,
};
extern u32 svc_max_payload(const struct svc_rqst *rqstp);
/*
* RPC Call and Reply messages each have their own page array.
* rq_pages holds the incoming Call message; rq_respages holds
* the outgoing Reply message. Both arrays are sized to
* svc_serv_maxpages() entries and are allocated dynamically.
*
* Pages are sent using ->sendmsg with MSG_SPLICE_PAGES so each
* server thread needs to allocate more to replace those used in
* sending.
*
* rq_pages request page contract:
*
* Transport receive paths that move request data pages out of
* rq_pages -- TCP multi-fragment reassembly (svc_tcp_save_pages)
* and RDMA Read I/O (svc_rdma_clear_rqst_pages) -- NULL those
* entries to prevent svc_rqst_release_pages() from freeing pages
* still in transport use, and set rq_pages_nfree to the count.
* svc_alloc_arg() refills only that many rq_pages entries.
*
* For rq_respages, svc_rqst_release_pages() NULLs entries in
* [rq_respages, rq_next_page) after each RPC. svc_alloc_arg()
* refills only that range.
*
* xdr_buf holds responses; the structure fits NFS read responses
* (header, data pages, optional tail) and enables sharing of
* client-side routines.
*
* The xdr_buf.head kvec always points to the first page in the
* rq_*pages list. The xdr_buf.pages pointer points to the second
* page on that list. xdr_buf.tail points to the end of the first
* page. This assumes that the non-page part of an rpc reply will
* fit in a page - NFSd ensures this. lockd also has no trouble.
*/
/**
* svc_serv_maxpages - maximum count of pages needed for one RPC message
* @serv: RPC service context
*
* Returns a count of pages or vectors that can hold the maximum
* size RPC message for @serv.
*
* Each page array can hold at most one payload plus two
* overhead pages (one for the RPC header, one for tail data).
* nfsd_splice_actor() might need an extra page when a READ
* payload is not page-aligned.
*/
static inline unsigned long svc_serv_maxpages(const struct svc_serv *serv)
{
return DIV_ROUND_UP(serv->sv_max_mesg, PAGE_SIZE) + 2 + 1;
}
/*
* The context of a single thread, including the request currently being
* processed.
*
* RPC programs are free to use rq_private to stash thread-local information.
* The sunrpc layer will not access it.
*/
struct svc_rqst {
struct list_head rq_all; /* all threads list */
struct llist_node rq_idle; /* On the idle list */
struct rcu_head rq_rcu_head; /* for RCU deferred kfree */
struct svc_xprt * rq_xprt; /* transport ptr */
struct sockaddr_storage rq_addr; /* peer address */
size_t rq_addrlen;
struct sockaddr_storage rq_daddr; /* dest addr of request
* - reply from here */
size_t rq_daddrlen;
struct svc_serv * rq_server; /* RPC service definition */
struct svc_pool * rq_pool; /* thread pool */
const struct svc_procedure *rq_procinfo;/* procedure info */
struct auth_ops * rq_authop; /* authentication flavour */
struct svc_cred rq_cred; /* auth info */
void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
struct xdr_buf rq_arg;
struct xdr_stream rq_arg_stream;
struct xdr_stream rq_res_stream;
struct folio *rq_scratch_folio;
struct xdr_buf rq_res;
unsigned long rq_maxpages; /* entries per page array */
unsigned long rq_pages_nfree; /* rq_pages entries NULLed by transport */
struct page * *rq_pages; /* Call buffer pages */
struct page * *rq_respages; /* Reply buffer pages */
struct page * *rq_next_page; /* next reply page to use */
struct page * *rq_page_end; /* one past the last reply page */
struct folio_batch rq_fbatch;
struct bio_vec *rq_bvec;
__be32 rq_xid; /* transmission id */
u32 rq_prog; /* program number */
u32 rq_vers; /* program version */
u32 rq_proc; /* procedure number */
u32 rq_prot; /* IP protocol */
unsigned long rq_flags; /* flags field */
ktime_t rq_qtime; /* enqueue time */
void * rq_argp; /* decoded arguments */
void * rq_resp; /* xdr'd results */
__be32 *rq_accept_statp;
void * rq_auth_data; /* flavor-specific data */
__be32 rq_auth_stat; /* authentication status */
int rq_auth_slack; /* extra space xdr code
* should leave in head
* for krb5i, krb5p.
*/
int rq_reserved; /* space on socket outq
* reserved for this request
*/
ktime_t rq_stime; /* start time */
struct cache_req rq_chandle; /* handle passed to caches for
* request delaying
*/
/* Catering to nfsd */
struct auth_domain * rq_client; /* RPC peer info */
struct auth_domain * rq_gssclient; /* "gss/"-style peer info */
struct task_struct *rq_task; /* service thread */
struct net *rq_bc_net; /* pointer to backchannel's
* net namespace
*/
int rq_err; /* Thread sets this to inidicate
* initialisation success.
*/
unsigned long bc_to_initval;
unsigned int bc_to_retries;
unsigned int rq_status_counter; /* RPC processing counter */
void *rq_private; /* For use by the service thread */
};
/* bits for rq_flags */
enum {
RQ_SECURE, /* secure port */
RQ_LOCAL, /* local request */
RQ_USEDEFERRAL, /* use deferral */
RQ_DROPME, /* drop current reply */
RQ_VICTIM, /* Have agreed to shut down */
RQ_DATA
|