aboutsummaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2026-01-20 18:10:04 -0800
committerJakub Kicinski <kuba@kernel.org>2026-01-20 18:10:04 -0800
commit677a51790be9fe1c843885e6d0c613a50f1de1c0 (patch)
tree4db0ec5119d55672689b0d7762e8cf90994e425f /tools/testing
parent8766d61a1d33cb5f15bfdd6ce9832bbe1fc649c2 (diff)
parentd1de61db1536727c1cad049c09decff22e8b6dd7 (diff)
Merge tag 'net-queue-rx-buf-len-v9' of https://github.com/isilence/linux
Pavel Begunkov says: ==================== Add support for providers with large rx buffer Many modern NICs support configurable receive buffer lengths, and zcrx and memory providers can use buffers larger than 4K to improve performance. When paired with hw-gro larger rx buffer sizes can drastically reduce the number of buffers traversing the stack and save a lot of processing time. It also allows to give to users larger contiguous chunks of data. Single stream benchmarks showed up to ~30% CPU util improvement. E.g. comparison for 4K vs 32K buffers using a 200Gbit NIC: packets=23987040 (MB=2745098), rps=199559 (MB/s=22837) CPU %usr %nice %sys %iowait %irq %soft %idle 0 1.53 0.00 27.78 2.72 1.31 66.45 0.22 packets=24078368 (MB=2755550), rps=200319 (MB/s=22924) CPU %usr %nice %sys %iowait %irq %soft %idle 0 0.69 0.00 8.26 31.65 1.83 57.00 0.57 This series adds net infrastructure for memory providers configuring the size and implements it for bnxt. It's an opt-in feature for drivers, they should advertise support for the parameter in the qops and must check if the hardware supports the given size. It's limited to memory providers as it drastically simplifies implementation. It doesn't affect the fast path zcrx uAPI, and the user exposed parameter is defined in zcrx terms, which allows it to be flexible and adjusted in the future. A liburing example can be found at [2] full branch: [1] https://github.com/isilence/linux.git zcrx/large-buffers-v8 Liburing example: [2] https://github.com/isilence/liburing.git zcrx/rx-buf-len * tag 'net-queue-rx-buf-len-v9' of https://github.com/isilence/linux: io_uring/zcrx: document area chunking parameter selftests: iou-zcrx: test large chunk sizes eth: bnxt: support qcfg provided rx page size eth: bnxt: adjust the fill level of agg queues with larger buffers eth: bnxt: store rx buffer size per queue net: pass queue rx page size from memory provider net: add bare bone queue configs net: reduce indent of struct netdev_queue_mgmt_ops members net: memzero mp params when closing a queue ==================== Link: https://patch.msgid.link/ Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c72
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py39
2 files changed, 99 insertions, 12 deletions
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index 62456df947bc..240d13dbc54e 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -12,6 +12,7 @@
#include <unistd.h>
#include <arpa/inet.h>
+#include <linux/mman.h>
#include <linux/errqueue.h>
#include <linux/if_packet.h>
#include <linux/ipv6.h>
@@ -37,6 +38,23 @@
#include <liburing.h>
+#define SKIP_CODE 42
+
+struct t_io_uring_zcrx_ifq_reg {
+ __u32 if_idx;
+ __u32 if_rxq;
+ __u32 rq_entries;
+ __u32 flags;
+
+ __u64 area_ptr; /* pointer to struct io_uring_zcrx_area_reg */
+ __u64 region_ptr; /* struct io_uring_region_desc * */
+
+ struct io_uring_zcrx_offsets offsets;
+ __u32 zcrx_id;
+ __u32 rx_buf_len;
+ __u64 __resv[3];
+};
+
static long page_size;
#define AREA_SIZE (8192 * page_size)
#define SEND_SIZE (512 * 4096)
@@ -65,6 +83,8 @@ static bool cfg_oneshot;
static int cfg_oneshot_recvs;
static int cfg_send_size = SEND_SIZE;
static struct sockaddr_in6 cfg_addr;
+static unsigned int cfg_rx_buf_len;
+static bool cfg_dry_run;
static char *payload;
static void *area_ptr;
@@ -128,14 +148,28 @@ static void setup_zcrx(struct io_uring *ring)
if (!ifindex)
error(1, 0, "bad interface name: %s", cfg_ifname);
- area_ptr = mmap(NULL,
- AREA_SIZE,
- PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE,
- 0,
- 0);
- if (area_ptr == MAP_FAILED)
- error(1, 0, "mmap(): zero copy area");
+ if (cfg_rx_buf_len && cfg_rx_buf_len != page_size) {
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE |
+ MAP_HUGETLB | MAP_HUGE_2MB,
+ -1,
+ 0);
+ if (area_ptr == MAP_FAILED) {
+ printf("Can't allocate huge pages\n");
+ exit(SKIP_CODE);
+ }
+ } else {
+ area_ptr = mmap(NULL,
+ AREA_SIZE,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE,
+ 0,
+ 0);
+ if (area_ptr == MAP_FAILED)
+ error(1, 0, "mmap(): zero copy area");
+ }
ring_size = get_refill_ring_size(rq_entries);
ring_ptr = mmap(NULL,
@@ -157,17 +191,23 @@ static void setup_zcrx(struct io_uring *ring)
.flags = 0,
};
- struct io_uring_zcrx_ifq_reg reg = {
+ struct t_io_uring_zcrx_ifq_reg reg = {
.if_idx = ifindex,
.if_rxq = cfg_queue_id,
.rq_entries = rq_entries,
.area_ptr = (__u64)(unsigned long)&area_reg,
.region_ptr = (__u64)(unsigned long)&region_reg,
+ .rx_buf_len = cfg_rx_buf_len,
};
- ret = io_uring_register_ifq(ring, &reg);
- if (ret)
+ ret = io_uring_register_ifq(ring, (void *)&reg);
+ if (cfg_rx_buf_len && (ret == -EINVAL || ret == -EOPNOTSUPP ||
+ ret == -ERANGE)) {
+ printf("Large chunks are not supported %i\n", ret);
+ exit(SKIP_CODE);
+ } else if (ret) {
error(1, 0, "io_uring_register_ifq(): %d", ret);
+ }
rq_ring.khead = (unsigned int *)((char *)ring_ptr + reg.offsets.head);
rq_ring.ktail = (unsigned int *)((char *)ring_ptr + reg.offsets.tail);
@@ -323,6 +363,8 @@ static void run_server(void)
io_uring_queue_init(512, &ring, flags);
setup_zcrx(&ring);
+ if (cfg_dry_run)
+ return;
add_accept(&ring, fd);
@@ -383,7 +425,7 @@ static void parse_opts(int argc, char **argv)
usage(argv[0]);
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:")) != -1) {
+ while ((c = getopt(argc, argv, "sch:p:l:i:q:o:z:x:d")) != -1) {
switch (c) {
case 's':
if (cfg_client)
@@ -418,6 +460,12 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_send_size = strtoul(optarg, NULL, 0);
break;
+ case 'x':
+ cfg_rx_buf_len = page_size * strtoul(optarg, NULL, 0);
+ break;
+ case 'd':
+ cfg_dry_run = true;
+ break;
}
}
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 2c5acfb4f5dc..c63d6d6450d2 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -8,6 +8,7 @@ from lib.py import NetDrvEpEnv
from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
from lib.py import EthtoolFamily
+SKIP_CODE = 42
def create_rss_ctx(cfg):
output = ethtool(f"-X {cfg.ifname} context new start {cfg.target} equal 1").stdout
@@ -114,6 +115,44 @@ def test_zcrx_oneshot(cfg, setup) -> None:
cmd(tx_cmd, host=cfg.remote)
+def test_zcrx_large_chunks(cfg) -> None:
+ """Test zcrx with large buffer chunks."""
+
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on")
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto")
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0")
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}")
+
+ ethtool(f"-G {cfg.ifname} rx 64")
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}")
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}")
+
+ rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -x 2"
+ tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840"
+
+ probe = cmd(rx_cmd + " -d", fail=False)
+ if probe.ret == SKIP_CODE:
+ raise KsftSkipEx(probe.stdout)
+
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(port, proto="tcp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
def main() -> None:
with NetDrvEpEnv(__file__) as cfg:
cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../drivers/net/hw/iou-zcrx")