// SPDX-License-Identifier: GPL-2.0-or-later
/*
* ip_vs_est.c: simple rate estimator for IPVS
*
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
* Network name space (netns) aware.
* Global data moved to netns i.e struct netns_ipvs
* Affected data: est_list and est_lock.
* estimation_timer() runs with timer per netns.
* get_stats()) do the per cpu summing.
*/
#define pr_fmt(fmt) "IPVS: " fmt
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/types.h>
#include <linux/interrupt.h>
#include <linux/sysctl.h>
#include <linux/list.h>
#include <linux/rcupdate_wait.h>
#include <net/ip_vs.h>
/*
This code is to estimate rate in a shorter interval (such as 8
seconds) for virtual services and real servers. For measure rate in a
long interval, it is easy to implement a user level daemon which
periodically reads those statistical counters and measure rate.
We measure rate during the last 8 seconds every 2 seconds:
avgrate = avgrate*(1-W) + rate*W
where W = 2^(-2)
NOTES.
* Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
* Netlink users can see 64-bit values but sockopt users are restricted
to 32-bit values for conns, packets, bps, cps and pps.
* A lot of code is taken from net/core/gen_estimator.c
KEY POINTS:
- cpustats counters are updated per-cpu in SoftIRQ context with BH disabled
- kthreads read the cpustats to update the estimators (svcs, dests, total)
- the states of estimators can be read (get stats) or modified (zero stats)
from processes
KTHREADS:
- estimators are added initially to est_temp_list and later kthread 0
distributes them to one or many kthreads for estimation
- kthread contexts are created and attached to array
- the kthread tasks are started when first service is added, before that
the total stats are not estimated
- when configuration (cpulist/nice) is changed, the tasks are restarted
by work (est_reload_work)
- kthread tasks are stopped while the cpulist is empty
- the kthread context holds lists with estimators (chains) which are
processed every 2 seconds
- as estimators can be added dynamically and in bursts, we try to spread
them to multiple chains which are estimated at different time
- on start, kthread 0 enters calculation phase to determine the chain limits
and the limit of estimators per kthread
- est_add_ktid: ktid where to add new ests, can point to empty slot where
we should add kt data
*/
static struct lock_class_key __ipvs_est_key;
static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs);
static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs);
static void ip_vs_chain_estimation(struct hlist_head *chain)
{
struct ip_vs_estimator *e;
struct ip_vs_cpu_stats *c;
struct ip_vs_stats *s;
u64 rate;
hlist_for_each_entry_rcu(e, chain, list) {
u64 conns, inpkts, outpkts, inbytes, outbytes;
u64 kconns = 0, kinpkts = 0, koutpkts = 0;
u64 kinbytes = 0, koutbytes = 0;
unsigned int start;
int i;
if (kthread_should_stop())
break;
s = container_of(e, struct ip_vs_stats, est);
for_each_possible_cpu(i) {
c = per_cpu_ptr(s->cpustats, i);
do {
start = u64_stats_fetch_begin(&c->syncp);
conns = u64_stats_read(&c->cnt.conns);
inpkts = u64_stats_read(&c->cnt.inpkts);
outpkts = u64_stats_read(&c->cnt.outpkts);
inbytes = u64_stats_read(&c->cnt.inbytes);
outbytes = u64_stats_read(&c->cnt.outbytes);
} while (u64_stats_fetch_retry(&c->syncp, start));
kconns += conns;
kinpkts += inpkts;
koutpkts += outpkts;
kinbytes += inbytes;
koutbytes += outbytes;
}
spin_lock(&s->lock);
s->kstats.conns = kconns;
s->kstats.inpkts = kinpkts;
s->kstats.outpkts = koutpkts;
s->kstats.inbytes = kinbytes;
s->kstats.outbytes = koutbytes;
/* scaled by 2^10, but divided 2 seconds */
rate = (s->kstats.conns - e->last_conns) << 9;
e->last_conns = s->kstats.conns;
e->cps += ((s64)rate - (s64)e->cps) >> 2;
rate = (s->kstats.inpkts - e->last_inpkts) << 9;
e->last_inpkts = s->kstats.inpkts;
e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
rate = (s->kstats.outpkts - e->last_outpkts) << 9;
e->last_outpkts = s->kstats.outpkts;
e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
/* scaled by 2^5, but divided 2 seconds */
rate = (s->kstats.inbytes - e->last_inbytes) << 4;
e->last_inbytes = s->kstats.inbytes;
e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
rate = (s->kstats.outbytes - e->last_outbytes) << 4;
e->last_outbytes = s->kstats.outbytes;
e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
spin_unlock(&s->lock);
}
}
static void ip_vs_tick_estimation(struct