Discussion:
how fast traffic can RPi4B filter
(too old to reply)
Ryutaroh Matsumoto
2021-04-20 04:40:01 UTC
Permalink
Hi,

Last weekend, I replaced my router in my home as the old one
showed hard disk health warnings.

In short, by using /etc/nftables.conf, RPi4B seems able to filter packets
at 600 Mbits/sec, where %irq of "atop" reaches 90% and M=1,000,000.

Packet processing capability depends on the complexity of tasks.
The router tunnels IPv4 packets over IPv6 by MAP-E (*).
MAP-E consists of two steps:
(1) IPv4 NAPT (Network Address and Port Translation)
(2) Tunneling (e.g. by ip -6 tunnel add $TUNDEV mode ip4ip6 remote $BR local $CE dev $WANDEV encaplimit none)

As the sets of usable ports by NAPT are discontinuous in MAP-E,
the filtering rule is very complex (attached at the bottom),
nonetheless, RPi4B is able to handle packets
at a satisfactory speed, which is a surprise.

(*) Explanations of MAP-E:
https://en.wikipedia.org/wiki/Mapping_of_Address_and_Port or
https://www.slideshare.net/akiranakagawa3/20150304-apricot2015apnicfukuoka

Best regards, Ryutaroh Matsumoto

/etc/nftables.conf:

#!/usr/sbin/nft -f

flush ruleset

table ip my_notrack {
chain PREROUTING {
type filter hook prerouting priority raw;
ip saddr 192.168.1.0/24 ip daddr 192.168.1.2 notrack
ip saddr 192.168.1.2 ip daddr 192.168.1.0/24 notrack
}
}


table inet map_e_filter {
chain PREROUTING {
type filter hook prerouting priority filter;
iifname ip6tnl1 meta nfproto ipv6 log prefix "Error: ipv6 in tunnel! " flags all counter
}

chain INPUT {
type filter hook input priority filter;

iifname ip6tnl1 meta nfproto ipv6 log prefix "Error: ipv6 in tunnel! " flags all counter
}
chain POSTROUTING {
type filter hook postrouting priority filter;
iifname ip6tnl1 tcp flags & syn == syn tcp option maxseg size set rt mtu counter # log prefix "TCPMSS shortened (input) " level debug flags all
oifname ip6tnl1 tcp flags & syn == syn tcp option maxseg size set rt mtu counter # log prefix "TCPMSS shortened (output) " level debug flags all
}
}

table ip map_e_nat {
map myvmap {
type mark : verdict
elements = { 1 : goto map_e_chain1, 2 : goto map_e_chain2, 3 : goto map_e_chain3, 4 : goto map_e_chain4, 5 : goto map_e_chain5, 6 : goto map_e_chain6, 7 : goto map_e_chain7, 8 : goto map_e_chain8, 9 : goto map_e_chain9, 10 : goto map_e_chain10, 11 : goto map_e_chain11, 12 : goto map_e_chain12, 13 : goto map_e_chain13, 14 : goto map_e_chain14, 15 : goto map_e_chain15, 16 : goto map_e_chain16, 17 : goto map_e_chain17, 18 : goto map_e_chain18, 19 : goto map_e_chain19, 20 : goto map_e_chain20, 21 : goto map_e_chain21, 22 : goto map_e_chain22, 23 : goto map_e_chain23, 24 : goto map_e_chain24, 25 : goto map_e_chain25, 26 : goto map_e_chain26, 27 : goto map_e_chain27, 28 : goto map_e_chain28, 29 : goto map_e_chain29, 30 : goto map_e_chain30, 31 : goto map_e_chain31, 32 : goto map_e_chain32, 33 : goto map_e_chain33, 34 : goto map_e_chain34, 35 : goto map_e_chain35, 36 : goto map_e_chain36, 37 : goto map_e_chain37, 38 : goto map_e_chain38, 39 : goto map_e_chain39, 40 : goto map_e_chain40, 41 : goto map_e_chain41, 42 : goto map_e_chain42, 43 : goto map_e_chain43, 44 : goto map_e_chain44, 45 : goto map_e_chain45, 46 : goto map_e_chain46, 47 : goto map_e_chain47, 48 : goto map_e_chain48, 49 : goto map_e_chain49, 50 : goto map_e_chain50, 51 : goto map_e_chain51, 52 : goto map_e_chain52, 53 : goto map_e_chain53, 54 : goto map_e_chain54, 55 : goto map_e_chain55, 56 : goto map_e_chain56, 57 : goto map_e_chain57, 58 : goto map_e_chain58, 59 : goto map_e_chain59, 60 : goto map_e_chain60, 61 : goto map_e_chain61, 62 : goto map_e_chain62, 63 : goto map_e_chain63, 64 : goto map_e_chain64 }
}

chain POSTROUTING {
type nat hook postrouting priority srcnat;
oifname ip6tnl1 mark set 1 counter packets 0
oifname ip6tnl1 meta l4proto tcp mark set numgen inc mod 61 offset 2 counter packets 0 # Chain64 or 63 will not be used and reserved for server use.
oifname ip6tnl1 meta l4proto udp mark set numgen inc mod 61 offset 2 counter packets 0 # Chain64 or 63 will not be used and reserved for server use.
oifname ip6tnl1 meta l4proto icmp mark set numgen inc mod 61 offset 2 counter packets 0 # Chain64 or 63 will not be used and reserved for server use.
oifname ip6tnl1 meta l4proto udplite mark set numgen inc mod 61 offset 2 counter packets 0 # Chain64 or 63 will not be used and reserved for server use.
oifname ip6tnl1 meta l4proto sctp mark set numgen inc mod 61 offset 2 counter packets 0 # Chain64 or 63 will not be used and reserved for server use.
oifname ip6tnl1 meta l4proto dccp mark set numgen inc mod 61 offset 2 counter packets 0 # Chain64 or 63 will not be used and reserved for server use.
oifname ip6tnl1 meta mark vmap @myvmap
}
chain map_e_chain1 { log prefix "Unknown protocol to ip6tnl " level info flags all counter; }

chain map_e_chain2 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:1728-1743 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:1728-1743 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:1728-1743 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:1728-1743 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:1728-1743 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:1728-1743 persistent; }
chain map_e_chain3 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:2752-2767 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:2752-2767 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:2752-2767 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:2752-2767 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:2752-2767 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:2752-2767 persistent; }
chain map_e_chain4 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:3776-3791 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:3776-3791 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:3776-3791 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:3776-3791 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:3776-3791 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:3776-3791 persistent; }
chain map_e_chain5 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:4800-4815 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:4800-4815 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:4800-4815 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:4800-4815 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:4800-4815 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:4800-4815 persistent; }
chain map_e_chain6 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:5824-5839 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:5824-5839 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:5824-5839 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:5824-5839 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:5824-5839 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:5824-5839 persistent; }
chain map_e_chain7 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:6848-6863 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:6848-6863 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:6848-6863 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:6848-6863 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:6848-6863 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:6848-6863 persistent; }
chain map_e_chain8 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:7872-7887 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:7872-7887 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:7872-7887 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:7872-7887 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:7872-7887 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:7872-7887 persistent; }
chain map_e_chain9 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:8896-8911 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:8896-8911 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:8896-8911 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:8896-8911 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:8896-8911 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:8896-8911 persistent; }
chain map_e_chain10 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:9920-9935 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:9920-9935 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:9920-9935 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:9920-9935 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:9920-9935 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:9920-9935 persistent; }
chain map_e_chain11 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:10944-10959 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:10944-10959 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:10944-10959 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:10944-10959 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:10944-10959 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:10944-10959 persistent; }
chain map_e_chain12 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:11968-11983 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:11968-11983 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:11968-11983 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:11968-11983 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:11968-11983 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:11968-11983 persistent; }
chain map_e_chain13 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:12992-13007 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:12992-13007 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:12992-13007 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:12992-13007 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:12992-13007 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:12992-13007 persistent; }
chain map_e_chain14 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:14016-14031 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:14016-14031 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:14016-14031 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:14016-14031 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:14016-14031 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:14016-14031 persistent; }
chain map_e_chain15 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:15040-15055 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:15040-15055 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:15040-15055 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:15040-15055 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:15040-15055 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:15040-15055 persistent; }
chain map_e_chain16 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:16064-16079 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:16064-16079 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:16064-16079 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:16064-16079 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:16064-16079 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:16064-16079 persistent; }
chain map_e_chain17 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:17088-17103 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:17088-17103 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:17088-17103 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:17088-17103 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:17088-17103 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:17088-17103 persistent; }
chain map_e_chain18 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:18112-18127 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:18112-18127 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:18112-18127 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:18112-18127 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:18112-18127 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:18112-18127 persistent; }
chain map_e_chain19 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:19136-19151 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:19136-19151 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:19136-19151 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:19136-19151 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:19136-19151 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:19136-19151 persistent; }
chain map_e_chain20 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:20160-20175 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:20160-20175 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:20160-20175 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:20160-20175 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:20160-20175 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:20160-20175 persistent; }
chain map_e_chain21 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:21184-21199 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:21184-21199 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:21184-21199 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:21184-21199 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:21184-21199 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:21184-21199 persistent; }
chain map_e_chain22 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:22208-22223 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:22208-22223 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:22208-22223 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:22208-22223 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:22208-22223 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:22208-22223 persistent; }
chain map_e_chain23 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:23232-23247 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:23232-23247 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:23232-23247 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:23232-23247 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:23232-23247 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:23232-23247 persistent; }
chain map_e_chain24 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:24256-24271 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:24256-24271 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:24256-24271 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:24256-24271 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:24256-24271 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:24256-24271 persistent; }
chain map_e_chain25 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:25280-25295 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:25280-25295 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:25280-25295 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:25280-25295 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:25280-25295 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:25280-25295 persistent; }
chain map_e_chain26 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:26304-26319 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:26304-26319 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:26304-26319 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:26304-26319 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:26304-26319 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:26304-26319 persistent; }
chain map_e_chain27 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:27328-27343 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:27328-27343 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:27328-27343 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:27328-27343 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:27328-27343 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:27328-27343 persistent; }
chain map_e_chain28 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:28352-28367 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:28352-28367 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:28352-28367 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:28352-28367 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:28352-28367 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:28352-28367 persistent; }
chain map_e_chain29 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:29376-29391 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:29376-29391 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:29376-29391 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:29376-29391 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:29376-29391 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:29376-29391 persistent; }
chain map_e_chain30 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:30400-30415 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:30400-30415 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:30400-30415 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:30400-30415 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:30400-30415 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:30400-30415 persistent; }
chain map_e_chain31 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:31424-31439 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:31424-31439 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:31424-31439 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:31424-31439 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:31424-31439 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:31424-31439 persistent; }
chain map_e_chain32 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:32448-32463 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:32448-32463 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:32448-32463 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:32448-32463 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:32448-32463 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:32448-32463 persistent; }
chain map_e_chain33 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:33472-33487 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:33472-33487 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:33472-33487 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:33472-33487 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:33472-33487 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:33472-33487 persistent; }
chain map_e_chain34 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:34496-34511 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:34496-34511 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:34496-34511 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:34496-34511 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:34496-34511 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:34496-34511 persistent; }
chain map_e_chain35 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:35520-35535 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:35520-35535 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:35520-35535 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:35520-35535 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:35520-35535 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:35520-35535 persistent; }
chain map_e_chain36 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:36544-36559 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:36544-36559 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:36544-36559 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:36544-36559 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:36544-36559 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:36544-36559 persistent; }
chain map_e_chain37 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:37568-37583 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:37568-37583 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:37568-37583 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:37568-37583 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:37568-37583 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:37568-37583 persistent; }
chain map_e_chain38 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:38592-38607 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:38592-38607 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:38592-38607 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:38592-38607 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:38592-38607 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:38592-38607 persistent; }
chain map_e_chain39 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:39616-39631 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:39616-39631 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:39616-39631 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:39616-39631 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:39616-39631 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:39616-39631 persistent; }
chain map_e_chain40 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:40640-40655 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:40640-40655 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:40640-40655 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:40640-40655 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:40640-40655 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:40640-40655 persistent; }
chain map_e_chain41 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:41664-41679 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:41664-41679 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:41664-41679 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:41664-41679 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:41664-41679 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:41664-41679 persistent; }
chain map_e_chain42 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:42688-42703 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:42688-42703 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:42688-42703 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:42688-42703 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:42688-42703 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:42688-42703 persistent; }
chain map_e_chain43 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:43712-43727 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:43712-43727 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:43712-43727 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:43712-43727 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:43712-43727 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:43712-43727 persistent; }
chain map_e_chain44 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:44736-44751 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:44736-44751 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:44736-44751 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:44736-44751 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:44736-44751 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:44736-44751 persistent; }
chain map_e_chain45 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:45760-45775 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:45760-45775 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:45760-45775 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:45760-45775 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:45760-45775 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:45760-45775 persistent; }
chain map_e_chain46 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:46784-46799 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:46784-46799 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:46784-46799 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:46784-46799 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:46784-46799 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:46784-46799 persistent; }
chain map_e_chain47 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:47808-47823 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:47808-47823 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:47808-47823 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:47808-47823 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:47808-47823 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:47808-47823 persistent; }
chain map_e_chain48 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:48832-48847 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:48832-48847 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:48832-48847 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:48832-48847 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:48832-48847 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:48832-48847 persistent; }
chain map_e_chain49 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:49856-49871 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:49856-49871 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:49856-49871 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:49856-49871 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:49856-49871 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:49856-49871 persistent; }
chain map_e_chain50 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:50880-50895 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:50880-50895 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:50880-50895 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:50880-50895 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:50880-50895 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:50880-50895 persistent; }
chain map_e_chain51 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:51904-51919 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:51904-51919 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:51904-51919 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:51904-51919 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:51904-51919 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:51904-51919 persistent; }
chain map_e_chain52 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:52928-52943 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:52928-52943 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:52928-52943 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:52928-52943 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:52928-52943 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:52928-52943 persistent; }
chain map_e_chain53 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:53952-53967 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:53952-53967 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:53952-53967 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:53952-53967 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:53952-53967 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:53952-53967 persistent; }
chain map_e_chain54 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:54976-54991 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:54976-54991 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:54976-54991 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:54976-54991 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:54976-54991 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:54976-54991 persistent; }
chain map_e_chain55 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:56000-56015 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:56000-56015 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:56000-56015 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:56000-56015 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:56000-56015 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:56000-56015 persistent; }
chain map_e_chain56 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:57024-57039 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:57024-57039 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:57024-57039 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:57024-57039 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:57024-57039 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:57024-57039 persistent; }
chain map_e_chain57 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:58048-58063 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:58048-58063 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:58048-58063 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:58048-58063 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:58048-58063 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:58048-58063 persistent; }
chain map_e_chain58 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:59072-59087 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:59072-59087 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:59072-59087 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:59072-59087 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:59072-59087 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:59072-59087 persistent; }
chain map_e_chain59 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:60096-60111 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:60096-60111 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:60096-60111 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:60096-60111 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:60096-60111 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:60096-60111 persistent; }
chain map_e_chain60 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:61120-61135 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:61120-61135 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:61120-61135 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:61120-61135 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:61120-61135 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:61120-61135 persistent; }
chain map_e_chain61 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:62144-62159 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:62144-62159 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:62144-62159 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:62144-62159 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:62144-62159 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:62144-62159 persistent; }
chain map_e_chain62 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:63168-63183 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:63168-63183 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:63168-63183 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:63168-63183 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:63168-63183 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:63168-63183 persistent; }
chain map_e_chain63 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:64192-64207 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:64192-64207 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:64192-64207 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:64192-64207 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:64192-64207 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:64192-64207 persistent; }
chain map_e_chain64 { meta l4proto tcp counter packets 0 snat to 153.240.174.134:65216-65231 persistent; meta l4proto udp counter packets 0 snat to 153.240.174.134:65216-65231 persistent; meta l4proto icmp counter packets 0 snat to 153.240.174.134:65216-65231 persistent; meta l4proto udplite counter packets 0 snat to 153.240.174.134:65216-65231 persistent; meta l4proto sctp counter packets 0 snat to 153.240.174.134:65216-65231 persistent; meta l4proto dccp counter packets 0 snat to 153.240.174.134:65216-65231 persistent; }
}
Ryutaroh Matsumoto
2021-04-26 07:50:01 UTC
Permalink
Hi,

For (ARM) SBCs with limited computational power, stripping out
unused features from the kernel sometimes improves the performance,
depending on usage.

For my use case of packet filtering by RPi4B,

CONFIG_PARAVIRT=n
CONFIG_DEBUG_KERNEL=n

each of the above increases the throughput of the packet filtering router
by about 100Mbps, from the baseline 600Mbps by linux-image-rt-arm64 5.10.
The above options cannot be disabled in Debian kernel package
for its wider use cases. Rebuild of linux-image-rt-arm64 was done by
https://github.com/emojifreak/debian-rpi-image-script/blob/main/build-debian-raspi-kernel.sh

On the other hand, I am wondering why the following options are currently
disabled by Debian arm64 kernel 5.10 package:

CONFIG_CLEANCACHE:
Cleancache can be thought of as a page-granularity victim cache for
clean pages that the kernel's pageframe replacement algorithm (PFRA)
would like to keep around, but can't since there isn't enough
memory. So when the PFRA "evicts" a page, it first attempts to use
cleancache code to put the data contained in that page into
"transcendent memory", memory that is not directly accessible or
addressable by the kernel and is of unknown and possibly time-varying
size. And when a cleancache-enabled filesystem wishes to access a page
in a file on disk, it first checks cleancache to see if it already
contains it; if it does, the page is copied into the kernel and a disk
access is avoided. When a transcendent memory driver is available
(such as zcache or Xen transcendent memory), a significant I/O
reduction may be achieved. When none is available, all cleancache
calls are reduced to a single pointer-compare-against-NULL resulting
in a negligible performance hit.

If unsure, say Y to enable cleancache

This is enabled by other distros.:
https://hlandau.github.io/kconfigreport/option/CONFIG_CLEANCACHE.xhtml

CONFIG_ZONE_DEVICE:
Device memory hotplug support allows for establishing pmem, or other
device driver discovered memory regions, in the memmap. This allows
pfn_to_page() lookups of otherwise "device-physical" addresses which
is needed for using a DAX mapping in an O_DIRECT operation, among
other things.

If FS_DAX is enabled, then say Y.

(FS_DAX is enabled in Debian arm64 kernel 5.10 package)

CONFIG_IRQ_TIME_ACCOUNTING:
Select this option to enable fine granularity task irq time
accounting. This is done by reading a timestamp on each transitions
between softirq and hardirq state, so there can be a small performance
impact.

(My observation suggests CONFIG_PARAVIRT=y having much higher overhead.)

If in doubt, say N here.

The above CONFIG_IRQ_TIME_ACCOUNTING enables %hi in "top".
See also "Is Your Linux Version Hiding Interrupt CPU Usage From You?"
https://tanelpoder.com/posts/linux-hiding-interrupt-cpu-usage/


CONFIG_PARAVIRT_TIME_ACCOUNTING has a similar role for
linux-image-cloud-arm64:
Select this option to enable fine granularity task steal time
accounting. Time spent executing other tasks in parallel with the
current vCPU is discounted from the vCPU power. To account for that,
there can be a small performance impact.

If in doubt, say N here.

The above enables "%st" in "top". Some other distros seem enabling it:
https://hlandau.github.io/kconfigreport/option/CONFIG_PARAVIRT_TIME_ACCOUNTING.xhtml


Best regards, Ryutaroh Matsumoto
Arnd Bergmann
2021-04-26 09:20:01 UTC
Permalink
On Mon, Apr 26, 2021 at 9:43 AM Ryutaroh Matsumoto
Post by Ryutaroh Matsumoto
For (ARM) SBCs with limited computational power, stripping out
unused features from the kernel sometimes improves the performance,
depending on usage.
For my use case of packet filtering by RPi4B,
CONFIG_PARAVIRT=n
CONFIG_DEBUG_KERNEL=n
each of the above increases the throughput of the packet filtering router
by about 100Mbps, from the baseline 600Mbps by linux-image-rt-arm64 5.10.
The above options cannot be disabled in Debian kernel package
for its wider use cases. Rebuild of linux-image-rt-arm64 was done by
https://github.com/emojifreak/debian-rpi-image-script/blob/main/build-debian-raspi-kernel.sh
Interesting analysis. I would have expected neither of those two options to
have a measurable effect on network throughput, so it is possible that
these are hitting a bug somewhere that leads to bad performance.

The only effect that CONFIG_PARAVIRT is supposed to have is the steal
time accounting. Incidentally that has just changed to a static_call
in linux-5.13
with commit a0e2bf7cb700 ("x86/paravirt: Switch time pvops functions to
use static_call()") on all architectures, so maybe that also addresses the
problem.

CONFIG_DEBUG_KERNEL by itself does not do anything, but instead it
controls a number of other configuration options. You should be able to
see which options changed by comparing the config file before and after
turning this off.

Generally I think at least CONFIG_DEBUG_INFO should be enabled in
a distro kernel in order to analyse bug reports better, but this is not
supposed to change executable code. What other options are disabled
when you turn this off?

Also, do you see the same performance difference with the non-rt kernel?
Most people would not run the -rt kernel because of the inherent
performance overhead, and it's not clear whether the slowdown you
see is the result of a combination of CONFIG_PREEMPT_RT with some
other option, or if this is something that hurts normal users as well.
Post by Ryutaroh Matsumoto
On the other hand, I am wondering why the following options are currently
Cleancache can be thought of as a page-granularity victim cache for
clean pages that the kernel's pageframe replacement algorithm (PFRA)
would like to keep around, but can't since there isn't enough
memory. So when the PFRA "evicts" a page, it first attempts to use
cleancache code to put the data contained in that page into
"transcendent memory", memory that is not directly accessible or
addressable by the kernel and is of unknown and possibly time-varying
size. And when a cleancache-enabled filesystem wishes to access a page
in a file on disk, it first checks cleancache to see if it already
contains it; if it does, the page is copied into the kernel and a disk
access is avoided. When a transcendent memory driver is available
(such as zcache or Xen transcendent memory), a significant I/O
reduction may be achieved. When none is available, all cleancache
calls are reduced to a single pointer-compare-against-NULL resulting
in a negligible performance hit.
If unsure, say Y to enable cleancache
https://hlandau.github.io/kconfigreport/option/CONFIG_CLEANCACHE.xhtml
This seems like a useful thing to enable.
Post by Ryutaroh Matsumoto
Device memory hotplug support allows for establishing pmem, or other
device driver discovered memory regions, in the memmap. This allows
pfn_to_page() lookups of otherwise "device-physical" addresses which
is needed for using a DAX mapping in an O_DIRECT operation, among
other things.
If FS_DAX is enabled, then say Y.
(FS_DAX is enabled in Debian arm64 kernel 5.10 package)
This should probably be an architecture-independent setting.
It does sound useful to only enable either both ZONE_DEVICE and
FS_DAX or not at all. I'm not aware of any arm64 hardware supporting
nvdimm or similar technology that needs these, but there is probably
someone who has it, if only in a lab.
Post by Ryutaroh Matsumoto
Select this option to enable fine granularity task irq time
accounting. This is done by reading a timestamp on each transitions
between softirq and hardirq state, so there can be a small performance
impact.
(My observation suggests CONFIG_PARAVIRT=y having much higher overhead.)
If in doubt, say N here.
The above CONFIG_IRQ_TIME_ACCOUNTING enables %hi in "top".
See also "Is Your Linux Version Hiding Interrupt CPU Usage From You?"
https://tanelpoder.com/posts/linux-hiding-interrupt-cpu-usage/
Indeed, reading the hardware clock on arm64 is usually cheap compared
to other architectures, so enabling this seems reasonable.

Arnd
Ryutaroh Matsumoto
2021-04-27 00:20:01 UTC
Permalink
Hi Arnd,
Post by Arnd Bergmann
Also, do you see the same performance difference with the non-rt kernel?
Most people would not run the -rt kernel because of the inherent
performance overhead, and it's not clear whether the slowdown you
see is the result of a combination of CONFIG_PREEMPT_RT with some
other option, or if this is something that hurts normal users as well.
Thank you for your interest.
I will check the differences of kernel compilation options for
non-rt kernel (linux-image-arm64).
Hopefully, I can return additional info. within one week.

Best regards, Ryutaroh
Alan Corey
2021-04-27 11:10:02 UTC
Permalink
Also look lor /proc/config.gz. If you have it it's a dump of the
config options of the running kernel. Whether it gets generated or not
is itself a config option.
Post by Ryutaroh Matsumoto
Hi Arnd,
Post by Arnd Bergmann
Also, do you see the same performance difference with the non-rt kernel?
Most people would not run the -rt kernel because of the inherent
performance overhead, and it's not clear whether the slowdown you
see is the result of a combination of CONFIG_PREEMPT_RT with some
other option, or if this is something that hurts normal users as well.
Thank you for your interest.
I will check the differences of kernel compilation options for
non-rt kernel (linux-image-arm64).
Hopefully, I can return additional info. within one week.
Best regards, Ryutaroh
--
-------------
Education is contagious.
Ryutaroh Matsumoto
2021-04-28 00:00:01 UTC
Permalink
Hi Alan, thank you for your interest.
Post by Alan Corey
Also look lor /proc/config.gz. If you have it it's a dump of the
config options of the running kernel. Whether it gets generated or not
is itself a config option.
I plan to make the minimal chanages to the config as rebuilding it by

apt-get source linux/sid
cd linux-5.10.28
fakeroot make -f debian/rules.gen setup_arm64_none_arm64
cat >>debian/build/build_arm64_none_arm64/.config <<'EOF'
CONFIG_XEN=n
CONFIG_PARAVIRT=n
EOF
fakeroot debian/rules source
fakeroot make -j 3 -f debian/rules.gen binary-arch_arm64_none_arm64

I expect not having /proc/config.gz as the CONFIG_IKCONFIG is disabled
in the Debian kernel.
I will include diff -u of .config in debian/build/build_arm64_none_arm64
and /usr/src/linux-config-5.10/config.arm64_rt_arm64

As CONFIG_XEN selects CONFIG_PARAVIRT, CONFIG_XEN=n is required
to build a kernel with CONFIG_PARAVIRT=n.

The last build of the above steps failed as ".btf.vmlinux.bin.o: file not recognized: file format not recognized". I am re-trying the build with adding
CONFIG_DEBUG_INFO_BTF=n

As single build takes 6 hours on RPi4B, it can take several days to find correct
steps to build. The above steps seems completely obeying the instructions at

https://www.debian.org/doc/manuals/debian-kernel-handbook/ch-common-tasks.html#s4.2.3
and
https://www.debian.org/doc/manuals/debian-kernel-handbook/ch-common-tasks.html#s4.2.5

Best regards, Ryutaroh
Alan Corey
2021-04-28 01:30:02 UTC
Permalink
I think you can probably enable CONFIG_IKCONFIG, I'm running a
Bullseye kernel that has a /proc/config.gz. But the kernel did come
from Manjaro I think, it's a little strange. It's on a Pinebook Pro
and there's no official Debian release for it yet, this came from
debootstrap. Getting the drivers and device tree right are a
challenge, a few of the drivers are blobs. Made in China, engineered
in Hong Kong.
Post by Ryutaroh Matsumoto
Hi Alan, thank you for your interest.
Post by Alan Corey
Also look lor /proc/config.gz. If you have it it's a dump of the
config options of the running kernel. Whether it gets generated or not
is itself a config option.
I plan to make the minimal chanages to the config as rebuilding it by
apt-get source linux/sid
cd linux-5.10.28
fakeroot make -f debian/rules.gen setup_arm64_none_arm64
cat >>debian/build/build_arm64_none_arm64/.config <<'EOF'
CONFIG_XEN=n
CONFIG_PARAVIRT=n
EOF
fakeroot debian/rules source
fakeroot make -j 3 -f debian/rules.gen binary-arch_arm64_none_arm64
I expect not having /proc/config.gz as the CONFIG_IKCONFIG is disabled
in the Debian kernel.
I will include diff -u of .config in debian/build/build_arm64_none_arm64
and /usr/src/linux-config-5.10/config.arm64_rt_arm64
As CONFIG_XEN selects CONFIG_PARAVIRT, CONFIG_XEN=n is required
to build a kernel with CONFIG_PARAVIRT=n.
The last build of the above steps failed as ".btf.vmlinux.bin.o: file not
recognized: file format not recognized". I am re-trying the build with
adding
CONFIG_DEBUG_INFO_BTF=n
As single build takes 6 hours on RPi4B, it can take several days to find correct
steps to build. The above steps seems completely obeying the instructions at
https://www.debian.org/doc/manuals/debian-kernel-handbook/ch-common-tasks.html#s4.2.3
and
https://www.debian.org/doc/manuals/debian-kernel-handbook/ch-common-tasks.html#s4.2.5
Best regards, Ryutaroh
--
-------------
Education is contagious.
Ryutaroh Matsumoto
2021-04-28 01:40:01 UTC
Permalink
Hi Alan,
Post by Alan Corey
I think you can probably enable CONFIG_IKCONFIG, I'm running a
I am pretty sure I can,
as I am using my rebuilt Debian RT kernel with CONFIG_IKCONFIG=m.
I guess that Arnd wants comparison between the original Debian kernel
and a minimally changed kernel (I am not completely sure, of course).

I wonder why the Debian kernel team keeps CONFIG_IKCONFIG
and CONFIG_IKHEADERS disabled...
which probably makes linux-headers-* and linux-config-* packages
unnecessary.

Best regards, Ryutaroh
Alan Corey
2021-04-28 01:50:01 UTC
Permalink
The headers wouldn't be unnecessary if you want to build modules for
it I think. The linux-config may do the same thing as a config.gz.
Post by Ryutaroh Matsumoto
Hi Alan,
Post by Alan Corey
I think you can probably enable CONFIG_IKCONFIG, I'm running a
I am pretty sure I can,
as I am using my rebuilt Debian RT kernel with CONFIG_IKCONFIG=m.
I guess that Arnd wants comparison between the original Debian kernel
and a minimally changed kernel (I am not completely sure, of course).
I wonder why the Debian kernel team keeps CONFIG_IKCONFIG
and CONFIG_IKHEADERS disabled...
which probably makes linux-headers-* and linux-config-* packages
unnecessary.
Best regards, Ryutaroh
--
-------------
Education is contagious.
Ryutaroh Matsumoto
2021-04-30 02:20:01 UTC
Permalink
Hi,

This is a followup for my previous post of impact on kernel performance
by kernel comile options:

Summary:
* CONFIG_PARAVIRT=n has probably no positive impact on either
linux-image-arm64 or linux-image-rt-arm64.

* CONFIG_DEBUG_PREEMPT=n much improves performance of linux-image-rt-arm64,
while it is unselectabe with linux-image-arm64 as CONFIG_DEBUG_PREEMPT
depends on CONFIG_PREEMPTION.

* linux-image-rt-arm64 is much slower than the standard linux-image-arm64,
but its performance probably becomes comparable by omitting unnecessary
compile options for a given hardware.

* All kernel versions are 5.10.28.

Experiments:
Compile options are adjusted as follows:

apt-get source linux
cd linux-5.10.28
fakeroot make -f debian/rules.gen setup_arm64_none_arm64
cat >>debian/build/build_arm64_none_arm64/.config <<'EOF'
CONFIG_XEN=n
CONFIG_PARAVIRT=n
CONFIG_DEBUG_INFO_BTF=n
EOF
fakeroot debian/rules source
fakeroot make -j 4 -f debian/rules.gen binary-arch_arm64_none_arm64

CONFIG_XEN selects CONFIG_PARAVIRT, so it must be disabled when CONFIG_PARAVIRT=n.
CONFIG_DEBUG_INFO_BTF=y causes build error with linux-image-arm64.

The job of RPi4B is taking IPv4 packets, applying NAPT, encapslating them in IPv6,
and vice versa. Almost no user process is involved. CPU is mainly in the kernel
mode or interrupt. The cpu consumption of hard irq + softirq of single cpu core
spikes to 85 to 100% during the speedtest.

CPU frequency of RPi4 is set to the lowest (600 MHz) by
cpupower frequency-set -g powersave

IPv6 packets can travel at around 600-800Mbps. All IPv4 packets are
converted to IPv6 by RPi4, and no IPv4 packets are exchanged with the ISP.
ISP's network is essentially IPv6 single stack.
All devices are wired to a single Ethernet switch.

On another amd64 fast laptop, I do
speedtest -v --selection-details -a -i 192.168.1.72 -s 28910

The observed speeds are shown below:

linux-image-arm64 with no change:
Download: 577.23 Mbps (data used: 370.7 MB)
Upload: 386.99 Mbps (data used: 353.0 MB)
Download: 592.79 Mbps (data used: 1.1 GB)
Upload: 380.41 Mbps (data used: 171.0 MB)


linux-image-arm64 with CONFIG_PARAVIRT=n
Download: 485.35 Mbps (data used: 406.0 MB)
Upload: 380.57 Mbps (data used: 171.5 MB)
Download: 514.57 Mbps (data used: 256.8 MB)
Upload: 376.92 Mbps (data used: 169.2 MB)

linux-image-rt-arm64 with no change:
Download: 380.85 Mbps (data used: 422.2 MB)
Upload: 283.87 Mbps (data used: 127.8 MB)

linux-image-rt-arm64 with CONFIG_PARAVIRT=n
Download: 332.95 Mbps (data used: 265.4 MB)
Upload: 310.06 Mbps (data used: 273.7 MB)
Download: 385.97 Mbps (data used: 400.1 MB)
Upload: 295.57 Mbps (data used: 133.2 MB)
Download: 379.69 Mbps (data used: 394.0 MB)
Upload: 293.07 Mbps (data used: 139.4 MB)

linux-image-rt-arm64 with CONFIG_PARAVIRT=n & CONFIG_DEBUG_PREEMPT=n
Download: 425.95 Mbps (data used: 753.7 MB)
Upload: 347.50 Mbps (data used: 382.8 MB)
Download: 423.05 Mbps (data used: 499.4 MB)
Upload: 332.48 Mbps (data used: 149.4 MB)

RT kernel specialized for RPi:
https://github.com/emojifreak/debian-rpi-image-script/blob/main/build-debian-raspi-kernel.sh

Download: 488.33 Mbps (data used: 514.6 MB)
Upload: 416.72 Mbps (data used: 330.8 MB)
Download: 504.79 Mbps (data used: 633.5 MB)
Upload: 404.07 Mbps (data used: 258.5 MB)

Best regards, Ryutaroh
Arnd Bergmann
2021-04-30 09:50:01 UTC
Permalink
On Fri, Apr 30, 2021 at 4:10 AM Ryutaroh Matsumoto
Post by Ryutaroh Matsumoto
This is a followup for my previous post of impact on kernel performance
* CONFIG_PARAVIRT=n has probably no positive impact on either
linux-image-arm64 or linux-image-rt-arm64.
Ok
Post by Ryutaroh Matsumoto
* CONFIG_DEBUG_PREEMPT=n much improves performance of linux-image-rt-arm64,
while it is unselectabe with linux-image-arm64 as CONFIG_DEBUG_PREEMPT
depends on CONFIG_PREEMPTION.
* linux-image-rt-arm64 is much slower than the standard linux-image-arm64,
but its performance probably becomes comparable by omitting unnecessary
compile options for a given hardware.
I would not expect any change in performance from omitting unused drivers.
If turning off the other platforms has a performance impact, this could still
mean that there is a serious performance regression where we do not
expect it.

CONFIG_DEBUG_PREEMPT is a tough choice here: in a distro kernel,
this should probably be enabled since it may find RT specific bugs in
arbitrary drivers. Generally speaking, PREEMPT_RT is less well tested
than normal kernels, so having this enabled is particularly useful when
running on hardware that nobody else has tried it on before.
The impact of CONFIG_DEBUG_PREEMPT is also higher than I expected
here, it may be worth asking on the linux-rt-users list about what the
expected cost on arm64 hardware is.
Post by Ryutaroh Matsumoto
The job of RPi4B is taking IPv4 packets, applying NAPT, encapslating them in IPv6,
and vice versa. Almost no user process is involved. CPU is mainly in the kernel
mode or interrupt. The cpu consumption of hard irq + softirq of single cpu core
spikes to 85 to 100% during the speedtest.
This is likely all driver specific, and if you just need to improve network
throughput, tuning or hacking the driver probably makes more difference
than the kernel.

If this is the internal network device in the Raspberry Pi 4, I can see
that the platform is not particularly optimized for throughput, even
though the driver doesn't contain any serious blunders.

The first thing I see is that the driver can support 40 bit addressing,
but the platform doesn't declare the bus to be wider than 32 bits,
so it will always use bounce buffers for any address above the first
four gigabytes. Interestingly, the DTB file that comes with raspbian
does declare a /scb/dma-ranges property for the bus that ethernet
and PCI are attached to, which would make their kernel much
faster than a mainline kernel!

Another thing I see is that the ethernet device is actually able to
use four separate transmit queues, but it seems they are all
wired up the same interrupt line. For rx queues, the hardware
does seem to support it but the driver doesn't. I doubt that there
is anything you can do about this to make it use multiple CPUs.

Finally, I see that the TX queue is protected using a spinlock that
prevents the bcmgenet_xmit() function from running concurrently
with the __bcmgenet_tx_reclaim() function, so even when you
call xmit on a different CPU cores, it still won't utilize multiple cores
at any time, but rather lead to either spinning (with the normal
kernel) or blocking the thread (on a rt kernel). If the transmit
path can be changed to work without spinlocks, the differences
between rt and and non-rt would get smaller for your workload,
and probably faster in both cases.
Post by Ryutaroh Matsumoto
Download: 577.23 Mbps (data used: 370.7 MB)
Upload: 386.99 Mbps (data used: 353.0 MB)
Download: 592.79 Mbps (data used: 1.1 GB)
Upload: 380.41 Mbps (data used: 171.0 MB)
linux-image-arm64 with CONFIG_PARAVIRT=n
Download: 485.35 Mbps (data used: 406.0 MB)
Upload: 380.57 Mbps (data used: 171.5 MB)
Download: 514.57 Mbps (data used: 256.8 MB)
Upload: 376.92 Mbps (data used: 169.2 MB)
Curiously, these numbers suggest that turning off CONFIG_PARAVIRT
actually makes the kernel slower in the non-preempt version, while for
the preempt-rt kernel it does not show that counterintuitive effect.
Can you check whether there are any other differences in the .config
file besides CONFIG_PARAVIRT that may cause the difference, and
that you didn't mix up the results?
Post by Ryutaroh Matsumoto
Download: 380.85 Mbps (data used: 422.2 MB)
Upload: 283.87 Mbps (data used: 127.8 MB)
linux-image-rt-arm64 with CONFIG_PARAVIRT=n
Download: 332.95 Mbps (data used: 265.4 MB)
Upload: 310.06 Mbps (data used: 273.7 MB)
Download: 385.97 Mbps (data used: 400.1 MB)
Upload: 295.57 Mbps (data used: 133.2 MB)
Download: 379.69 Mbps (data used: 394.0 MB)
Upload: 293.07 Mbps (data used: 139.4 MB)
linux-image-rt-arm64 with CONFIG_PARAVIRT=n & CONFIG_DEBUG_PREEMPT=n
Download: 425.95 Mbps (data used: 753.7 MB)
Upload: 347.50 Mbps (data used: 382.8 MB)
Download: 423.05 Mbps (data used: 499.4 MB)
Upload: 332.48 Mbps (data used: 149.4 MB)
Nice!
Post by Ryutaroh Matsumoto
https://github.com/emojifreak/debian-rpi-image-script/blob/main/build-debian-raspi-kernel.sh
Download: 488.33 Mbps (data used: 514.6 MB)
Upload: 416.72 Mbps (data used: 330.8 MB)
Download: 504.79 Mbps (data used: 633.5 MB)
Upload: 404.07 Mbps (data used: 258.5 MB)
I see you do a couple of things in this fragment. One of them is the
CONFIG_BPF_JIT_ALWAYS_ON=y option that might result in
a significant difference if you actually use BPF (otherwise it makes
no difference).

Given that the numbers here are actually higher than the non-RT
kernel numbers, you clearly hit something very interesting here.

I also see that you enable a number debugging options, including
CONFIG_UBSAN_SANITIZE_ALL=y, which I would expect to make
the kernel significantly slower when turned on. Is this one enabled
in the other kernels as well, or did you find that it has a positive
effect here?

As mentioned above, turning off the unused platforms /should/ not
make a difference other than code size. Do you get different
results if you drop all the CONFIG_ARCH_*=n lines from the
fragment? If you do, I would consider that a problem in the
upstream kernel that needs to be investigated further.

Arnd
Ryutaroh Matsumoto
2021-05-02 06:30:02 UTC
Permalink
Sorry for a bit late response.
Post by Arnd Bergmann
I would not expect any change in performance from omitting unused drivers.
If turning off the other platforms has a performance impact, this could still
mean that there is a serious performance regression where we do not
expect it.
I do not know if you meant CONFIG_ARCH_* by "drivers".
Removal of all CONFIG_ARCH_* other than CONFIG_ARCH_BCM2835 disables
CONFIG_GENERIC_IRQ_MIGRATION=y
CONFIG_GENERIC_IRQ_CHIP=y
CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS=y

CONFIG_NUMA=n & CONFIG_HOTPLUG_CPU=n disable
CONFIG_HAVE_SETUP_PER_CPU_AREA
and CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK,
and enable CONFIG_ARCH_FLATMEM_ENABLE.

Those changes could have some impact...
Post by Arnd Bergmann
The impact of CONFIG_DEBUG_PREEMPT is also higher than I expected
here, it may be worth asking on the linux-rt-users list about what the
expected cost on arm64 hardware is.
I believe they are very well aware of this, see
https://wiki.linuxfoundation.org/realtime/documentation/howto/applications/preemptrt_setup

There recommendation seems(?) CONFIG_DEBUG_PREEMPT=n
for better performance.
Post by Arnd Bergmann
Can you check whether there are any other differences in the .config
file besides CONFIG_PARAVIRT that may cause the difference, and
that you didn't mix up the results?
I believe no.
The reason of the difference may come from:
* The number of measurement is too few (2 times).
* Measured speed depends on the IPv6 network of ISP, which I cannot make
constant.
The RPi4B is used for processing real network traffic and my family complains
if it is down for too long...
Post by Arnd Bergmann
I see you do a couple of things in this fragment. One of them is the
CONFIG_BPF_JIT_ALWAYS_ON=y option that might result in
a significant difference if you actually use BPF (otherwise it makes
no difference).
I believe the measured speed depends on nftables, ipv4-ipv6 tunnel,
macvlan driver, Ethernet driver and the general network stack, not
including BPF.

My net if config is:
ip6tnl1 (tunnel) binds to myve1 (macvlan), and
myve1 binds to eth0, and eth0 has absolutely no IPv4 or IPv6 address.
The reason of using macvlan is to use multiple macvlan and macvtap
interfaces binding to eth0.

"ip l" shows as follows:
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
2: eth0: <BROADCAST,NOARP,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
link/ether dc:a6:32:bb:99:d9 brd ff:ff:ff:ff:ff:ff
3: ***@eth0: <BROADCAST,MULTICAST,ALLMULTI,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000
link/ether 96:8a:a9:8d:f6:64 brd ff:ff:ff:ff:ff:ff
4: ***@eth0: <BROADCAST,MULTICAST> mtu 1500 qdisc noop state DOWN mode DEFAULT group default qlen 500
link/ether 8e:7e:4b:95:3b:59 brd ff:ff:ff:ff:ff:ff
5: ***@NONE: <NOARP> mtu 1452 qdisc noop state DOWN mode DEFAULT group default qlen 1000
link/tunnel6 :: brd :: permaddr 616:be05:411::
6: ***@myve1: <POINTOPOINT,NOARP,UP,LOWER_UP> mtu 1460 qdisc noqueue state UNKNOWN mode DEFAULT group default qlen 1000
link/tunnel6 2400:4050:2ba1:ac00:99:f0ae:8600:2c00 peer 2001:380:a120::9 permaddr 9648:2668:3d4f::
7: wlan0: <NO-CARRIER,BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast state DOWN mode DEFAULT group default qlen 1000
link/ether dc:a6:32:bb:99:da brd ff:ff:ff:ff:ff:ff
Post by Arnd Bergmann
I also see that you enable a number debugging options, including
CONFIG_UBSAN_SANITIZE_ALL=y, which I would expect to make
the kernel significantly slower when turned on. Is this one enabled
in the other kernels as well, or did you find that it has a positive
effect here?
As far as I see, CONFIG_UBSAN=y and CONFIG_UBSAN_SANITIZE_ALL=y
have not decreased the performance noticeablly (for my personal use cases).
So I choose to turn on them when I have chance to build a kernel.
As far as I can recall CONFIG_UBSAN related options did not
decrease the YouTube playing by firefox-esr.
For build of user-space applications, I have not seen " subjectively noticeable"
performance difference by UBSAN. So I routinely use -fanitize=undefined.
ASAN and MSAN are terribly slow, as we know well.
Post by Arnd Bergmann
As mentioned above, turning off the unused platforms /should/ not
make a difference other than code size. Do you get different
results if you drop all the CONFIG_ARCH_*=n lines from the
fragment? If you do, I would consider that a problem in the
upstream kernel that needs to be investigated further.
Having look at arch/arm64/Kconfig.platforms, I see some options
depending on CONFIG_ARCH_*. Besides the ones
mentioned at the beginning, they include
IRQ_DOMAIN_HIERARCHY
ARM_GIC

The *IRQ* and ARM_GIC config options can have some impact on the performance,
if a use case includes lots of HW interrupts, as I am using it

I am ready to re-build a Debian kernel with only CONFIG_ARCH_*
(except CONFIG_ARCH_BCM2835) disabled.

Best regards, Ryutaroh
Arnd Bergmann
2021-05-03 12:20:01 UTC
Permalink
On Sun, May 2, 2021 at 8:21 AM Ryutaroh Matsumoto
Post by Ryutaroh Matsumoto
Sorry for a bit late response.
Post by Arnd Bergmann
I would not expect any change in performance from omitting unused drivers.
If turning off the other platforms has a performance impact, this could still
mean that there is a serious performance regression where we do not
expect it.
I do not know if you meant CONFIG_ARCH_* by "drivers".
Removal of all CONFIG_ARCH_* other than CONFIG_ARCH_BCM2835 disables
CONFIG_GENERIC_IRQ_MIGRATION=y
CONFIG_GENERIC_IRQ_CHIP=y
CONFIG_IRQ_FASTEOI_HIERARCHY_HANDLERS=y
The way it generally works is that each platform option only allows you to
enable additional platform specific drivers that don't make sense elsewhere.
E.g. The generic irq chip infrastructure is library code that is used by certain
drivers but not others, so the expectation is that they would not change
the performance. If they do, that may be considered a bug.
Post by Ryutaroh Matsumoto
CONFIG_NUMA=n & CONFIG_HOTPLUG_CPU=n disable
CONFIG_HAVE_SETUP_PER_CPU_AREA
and CONFIG_NEED_PER_CPU_EMBED_FIRST_CHUNK,
and enable CONFIG_ARCH_FLATMEM_ENABLE.
Those changes could have some impact...
These can have a small performance impact, though it should
mostly make NUMA machines worse, rather than making non-NUMA
machines better.
Post by Ryutaroh Matsumoto
Post by Arnd Bergmann
The impact of CONFIG_DEBUG_PREEMPT is also higher than I expected
here, it may be worth asking on the linux-rt-users list about what the
expected cost on arm64 hardware is.
I believe they are very well aware of this, see
https://wiki.linuxfoundation.org/realtime/documentation/howto/applications/preemptrt_setup
There recommendation seems(?) CONFIG_DEBUG_PREEMPT=n
for better performance.
Ok, in that case it might help to change the Kconfig description that
today recommends turning it on:

|config DEBUG_PREEMPT
| bool "Debug preemptible kernel"
| depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
| default y
| help
| If you say Y here then the kernel will use a debug variant of the
| commonly used smp_processor_id() function and will print warnings
| if kernel code uses it in a preemption-unsafe way. Also, the kernel
| will detect preemption count underflows.

In particular the "default y" makes it sound like this has very little
impact.
Post by Ryutaroh Matsumoto
Post by Arnd Bergmann
Can you check whether there are any other differences in the .config
file besides CONFIG_PARAVIRT that may cause the difference, and
that you didn't mix up the results?
I believe no.
* The number of measurement is too few (2 times).
* Measured speed depends on the IPv6 network of ISP, which I cannot make
constant.
The RPi4B is used for processing real network traffic and my family complains
if it is down for too long...
Right. In that case, the other numbers are probably also less reliable
than the variance between runs suggests.
Post by Ryutaroh Matsumoto
Post by Arnd Bergmann
I see you do a couple of things in this fragment. One of them is the
CONFIG_BPF_JIT_ALWAYS_ON=y option that might result in
a significant difference if you actually use BPF (otherwise it makes
no difference).
I believe the measured speed depends on nftables, ipv4-ipv6 tunnel,
macvlan driver, Ethernet driver and the general network stack, not
including BPF.
Ok.
Post by Ryutaroh Matsumoto
ip6tnl1 (tunnel) binds to myve1 (macvlan), and
myve1 binds to eth0, and eth0 has absolutely no IPv4 or IPv6 address.
The reason of using macvlan is to use multiple macvlan and macvtap
interfaces binding to eth0.
Ok. FWIW, this driver also lacks support for IFF_UNICAST_FLT,
which means using macvlan/macvtap puts the device into
promiscuous mode, and every frame on the wire will have to
be processed coming into the device.
Post by Ryutaroh Matsumoto
Post by Arnd Bergmann
I also see that you enable a number debugging options, including
CONFIG_UBSAN_SANITIZE_ALL=y, which I would expect to make
the kernel significantly slower when turned on. Is this one enabled
in the other kernels as well, or did you find that it has a positive
effect here?
As far as I see, CONFIG_UBSAN=y and CONFIG_UBSAN_SANITIZE_ALL=y
have not decreased the performance noticeablly (for my personal use cases).
So I choose to turn on them when I have chance to build a kernel.
As far as I can recall CONFIG_UBSAN related options did not
decrease the YouTube playing by firefox-esr.
For build of user-space applications, I have not seen " subjectively noticeable"
performance difference by UBSAN. So I routinely use -fanitize=undefined.
ASAN and MSAN are terribly slow, as we know well.
The overhead of ubsan is very workload specific, I've seen other cases
in which it matters a lot.
Post by Ryutaroh Matsumoto
Post by Arnd Bergmann
As mentioned above, turning off the unused platforms /should/ not
make a difference other than code size. Do you get different
results if you drop all the CONFIG_ARCH_*=n lines from the
fragment? If you do, I would consider that a problem in the
upstream kernel that needs to be investigated further.
Having look at arch/arm64/Kconfig.platforms, I see some options
depending on CONFIG_ARCH_*. Besides the ones
mentioned at the beginning, they include
IRQ_DOMAIN_HIERARCHY
ARM_GIC
The *IRQ* and ARM_GIC config options can have some impact on the performance,
if a use case includes lots of HW interrupts, as I am using it
I am ready to re-build a Debian kernel with only CONFIG_ARCH_*
(except CONFIG_ARCH_BCM2835) disabled.
Ok. As I said, I don't think the IRQ options would matter here, but
turning off PREEMPT_RT should help a lot if there are too many
interrupts. More importantly, you can play around with changing
the IRQ coalescing numbers for 'ethtool -C' (if the driver supports
that). Setting the coalescing options higher generally improves
throughput because more work can be done per interrupt, but
setting it too high can add latency from buffer bloat.

Arnd

Loading...