dpvs
dpvs copied to clipboard
Intel(R) 82599 problems keepalive&dpvs DPDK NETIF: Ethdev port_id=0 invalid rss_hf: 0x3afbc, valid value: 0x38d34 NETIF: Ethdev port_id=0 invalid tx_offload: 0x1000e, valid value: 0x2a03f
Dpvs version : v1.9.4 Dpdk version : 20.11.1 Os version : oracle8 Kernel : 4.18.0-513.9.1.el8_9.x86_64 Keepalived (builded with dpdk&dpvs) : Keepalived v2.0.19 (unknown)
I have odd problem with following ethernet ; 41:00.0 Ethernet controller: Intel(R) 82599 10 Gigabit Dual Port Network Connection (rev 01)
When I start DPDK its giving me warning ; DPDK NETIF: Ethdev port_id=0 invalid rss_hf: 0x3afbc, valid value: 0x38d34 NETIF: Ethdev port_id=0 invalid tx_offload: 0x1000e, valid value: 0x2a03f
And still seems working properly and I can able to see kni interface up with configuration shared.
Problem is, when I start keepalived and when I run dpip addr add 192.168.4.2/16 dev dpdk0 ifconfig dpdk0.kni:1000 192.168.4.2 netmask 255.255.0.0
dpdk0 interface killing itself by keepalive. What can be problem and any suggestion ? I tried same scenerio with 82599ES brand without problem, but with 82599 i faced with this problems, any suggestions ?
`!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! This is dpvs default configuration file.
!
! The attribute "
! global config global_defs { log_level WARNING log_file /var/log/dpvs.log ! log_async_mode on ! kni on }
! netif config
netif_defs {
<init> device dpdk0 {
rx {
queue_number 11
descriptor_number 1024
rss all
}
tx {
queue_number 11
descriptor_number 1024
}
! mtu 1500
! promisc_mode
kni_name dpdk0.kni
}
}
! worker config (lcores)
worker_defs {
<init> worker cpu1 {
type slave
cpu_id 1
port dpdk0 {
rx_queue_ids 0
tx_queue_ids 0
! isol_rx_cpu_ids 9
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu2 {
type slave
cpu_id 2
port dpdk0 {
rx_queue_ids 1
tx_queue_ids 1
! isol_rx_cpu_ids 10
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu3 {
type slave
cpu_id 3
port dpdk0 {
rx_queue_ids 2
tx_queue_ids 2
! isol_rx_cpu_ids 11
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu4 {
type slave
cpu_id 4
port dpdk0 {
rx_queue_ids 3
tx_queue_ids 3
! isol_rx_cpu_ids 12
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu5 {
type slave
cpu_id 5
port dpdk0 {
rx_queue_ids 4
tx_queue_ids 4
! isol_rx_cpu_ids 13
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu6 {
type slave
cpu_id 6
port dpdk0 {
rx_queue_ids 5
tx_queue_ids 5
! isol_rx_cpu_ids 14
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu7 {
type slave
cpu_id 7
port dpdk0 {
rx_queue_ids 6
tx_queue_ids 6
! isol_rx_cpu_ids 15
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu8 {
type slave
cpu_id 8
port dpdk0 {
rx_queue_ids 7
tx_queue_ids 7
! isol_rx_cpu_ids 15
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu9 {
type slave
cpu_id 9
port dpdk0 {
rx_queue_ids 8
tx_queue_ids 8
! isol_rx_cpu_ids 15
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu10 {
type slave
cpu_id 10
port dpdk0 {
rx_queue_ids 9
tx_queue_ids 9
! isol_rx_cpu_ids 15
! isol_rxq_ring_sz 1048576
}
}
<init> worker cpu11 {
type slave
cpu_id 11
port dpdk0 {
rx_queue_ids 10
tx_queue_ids 10
! isol_rx_cpu_ids 15
! isol_rxq_ring_sz 1048576
}
}
!<init> worker cpu17 {
! type kni
! cpu_id 17
! port dpdk0 {
! rx_queue_ids 8
! tx_queue_ids 8
! }
!}
}
! timer config timer_defs { # cpu job loops to schedule dpdk timer management schedule_interval 500 }
! dpvs neighbor config
neigh_defs {
! dpvs ipset config
ipset_defs {
! dpvs ipv4 config
ipv4_defs {
forwarding off
! dpvs ipv6 config
ipv6_defs {
disable off
forwarding off
route6 {
! control plane config
ctrl_defs {
lcore_msg {
! ipvs config
ipvs_defs {
conn {
udp {
! defence_udp_drop
uoa_mode opp
uoa_max_trail 3
timeout {
normal 300
last 3
}
}
tcp {
! defence_tcp_drop
timeout {
none 2
established 90
syn_sent 3
syn_recv 30
fin_wait 7
time_wait 7
close 3
close_wait 7
last_ack 7
listen 120
synack 30
last 2
}
synproxy {
synack_options {
mss 1452
ttl 63
sack
! wscale
! timestamp
}
close_client_window
! defer_rs_syn
rs_syn_max_retry 3
ack_storm_thresh 10
max_ack_saved 3
conn_reuse_state {
close
time_wait
! fin_wait
! close_wait
! last_ack
}
}
}
}
! sa_pool config sa_pool { pool_hash_size 16 flow_enable off }`
Looks like its not related to ethernet brand, I tried with another device with different hardware configuration, when dpdk enabled, keepalive enabled, while binding IP to kni interfaces dpdk closing itself
It doesn't matter of the invalid rss_hf
logging. Actually, it's just a hint that the network device are not supporting all features DPVS expects. It may cause traffic imbalance across nic rx queues, and has hardly influences of dpvs functionality.
Ok seems rss_hf and tx_offload non related to my case.
Currently dpdk running without problem, when keepalived starting whenever we bind IP address to dpdk.kni:1001 for exaple keepalive somehow crashing the dpdk, is anyone faced with this problem ?
11 [Wed Dec 13 11:05:33 2023] rte_kni: Creating kni... [Wed Dec 13 11:05:52 2023] IPv6: ADDRCONF(NETDEV_UP): dpdk0.kni: link is not ready [Wed Dec 13 11:05:52 2023] IPv6: ADDRCONF(NETDEV_CHANGE): dpdk0.kni: link becomes ready [Wed Dec 13 11:09:21 2023] traps: lcore-worker-6[29560] general protection fault ip:94e52a sp:7f566f7f9ef0 error:0 [Wed Dec 13 11:09:21 2023] traps: lcore-worker-4[29558] general protection fault ip:94e52a sp:7f5674eb6ef0 error:0 [Wed Dec 13 11:09:21 2023] traps: lcore-worker-5[29559] general protection fault ip:94e52a sp:7f566fffaef0 error:0 [Wed Dec 13 11:09:21 2023] traps: lcore-worker-7[29561] general protection fault ip:94e52a sp:7f566eff8ef0 error:0 in dpvs[400000+eb1000] [Wed Dec 13 11:09:21 2023] in dpvs[400000+eb1000] [Wed Dec 13 11:09:21 2023] in dpvs[400000+eb1000] [Wed Dec 13 11:09:21 2023] in dpvs[400000+eb1000]
[Wed Dec 13 11:09:21 2023] traps: lcore-worker-3[29557] general protection fault ip:94e52a sp:7f56756b7ef0 error:0
[Wed Dec 13 11:09:24 2023] rte_kni: kni_net_process_request: wait_event_interruptible timeout
You may get a coredump a coredump stack to find out the exact location where dpvs crashed. Before doing it, please enable debug config and build dpvs with debug infomation.
sed -i 's/CONFIG_DEBUG=n/CONFIG_DEBUG=y/' config.mk
Thank you so much, after many test I can say that there is two different hardware. One is without NUMA support ( single CPU ) working properly with dpvs 1.9.4 + dpdk 20.11.1 + keepalived Second is Numa 2 ( dual CPU ) build for dpvs 1.9.4 + dpdk 20.11.1 + keepalived properly, but whenever keepalive run and binding IP address to interface , dpdk crashes, or keepalive not working properly.
I will try coredump and update here
Also i followed this steps for installing DVPS
kernel-4.18.0-513.9.1.el8_9.x86_64
Red Hat Enterprise Linux release 8.7 (Ootpa)
NIC TESTED
81:00.0 Ethernet controller: Intel Corporation 82599ES 10-Gigabit SFI/SFP+ Network Connection (rev 01)
CPU TESTED
Intel(R) Xeon(R) CPU E5-2699 v4 @ 2.20GHz ## NUMA2
BIOS NUMA ENABLED / HYPER THREAD OFF
yum -y install kernel-4.18.0-513.9.1.el8_9.x86_64 yum -y install kernel-devel-4.18.0-513.9.1.el8_9.x86_64 yum -y install kernel-headers-4.18.0-513.9.1.el8_9.x86_64 grubby --set-default "/boot/vmlinuz-4.18.0-513.9.1.el8_9.x86_64" reboot
systemctl stop NetworkManager
yum -y group install "Development Tools"
yum -y install tar wget patch epel-release pciutils net-tools popt-devel vim numactl numactl-devel
pip3 install ninja
pip3 install meson
export PATH="/usr/local/bin/:$PATH"
cd /opt
wget https://pkg-config.freedesktop.org/releases/pkg-config-0.29.2.tar.gz
tar -zxvf pkg-config-0.29.2.tar.gz
cd pkg-config-0.29.2/
./configure --with-internal-glib
make
make install
\cp pkg-config /usr/bin/pkg-config
\cp pkg-config /usr/local/bin/pkg-config
cd /opt
wget https://github.com/iqiyi/dpvs/archive/refs/tags/v1.9.4.tar.gz
tar xvf v1.9.4.tar.gz
mv dpvs-1.9.4 dpdk_load_balancer
cd dpdk_load_balancer/
wget https://fast.dpdk.org/rel/dpdk-20.11.1.tar.xz
tar xf dpdk-20.11.1.tar.xz
cp patch/dpdk-stable-20.11.1/*.patch dpdk-stable-20.11.1/
cd dpdk-stable-20.11.1/
patch -p1 < 0001-kni-use-netlink-event-for-multicast-driver-part.patch
patch -p1 < 0002-pdump-change-dpdk-pdump-tool-for-dpvs.patch
patch -p1 < 0003-debug-enable-dpdk-eal-memory-debug.patch
patch -p1 < 0004-ixgbe_flow-patch-ixgbe-fdir-rte_flow-for-dpvs.patch
patch -p1 < 0005-bonding-allow-slaves-from-different-numa-nodes.patch
patch -p1 < 0006-bonding-fix-bonding-mode-4-problems.patch
mkdir dpdklib
mkdir dpdkbuild
meson -Denable_kmods=true -Dprefix=/opt/dpdk_load_balancer/dpdk-stable-20.11.1/dpdklib dpdkbuild
ninja -C dpdkbuild
cd dpdkbuild; ninja install
export PKG_CONFIG_PATH=/opt/dpdk_load_balancer/dpdk-stable-20.11.1/dpdklib/lib64/pkgconfig/
export LIBDPDKPC_PATH=/opt/dpdk_load_balancer/dpdk-stable-20.11.1/dpdklib/lib64/pkgconfig/libdpdk.pc
echo 4096 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
echo 4096 > /sys/devices/system/node/node1/hugepages/hugepages-2048kB/nr_hugepages
mkdir /mnt/huge
mount -t hugetlbfs nodev /mnt/huge
modprobe uio_pci_generic
cd ..
insmod /opt/dpdk_load_balancer/dpdk-stable-20.11.1/dpdkbuild/kernel/linux/kni/rte_kni.ko carrier=on
/opt/dpdk_load_balancer/dpdk-stable-20.11.1/usertools/dpdk-devbind.py --status
ifconfig eth4 down
/opt/dpdk_load_balancer/dpdk-stable-20.11.1/usertools/dpdk-devbind.py -b uio_pci_generic 0000:81:00.0
cd ..
make
make install
cp /opt/dpdk_load_balancer/conf/dpvs.conf.single-nic.sample /etc/dpvs.conf
According to the messages from wechat, your problem is similar to #896. Remove the __rte_cache_aligned
in struct definition of neighbour_entry
(about line65 in include/neigh.h), and then retry.
53 struct neighbour_entry {
54 int af;
55 struct list_head neigh_list;
56 union inet_addr ip_addr;
...
...
64 uint8_t flag;
65 } __rte_cache_aligned;
Besides, DPVS is build with 2 numa nodes by default. If you want to run DPVS on a single numa hardware, set
CONFIG_DPVS_MAX_SOCKET=2
in config.mk
, and then build the dpvs. I didn't test dpvs on single node hardware before. Hope it works.
Hello, first of all while i am building DPVS I have warning related VRRP , can it may cause problem ? make[4]: Leaving directory '/opt/dpvs/tools/keepalived/keepalived/core' Making all in vrrp make[4]: Entering directory '/opt/dpvs/tools/keepalived/keepalived/vrrp' CC vrrp_daemon.o vrrp_daemon.c: In function ‘start_vrrp’: vrrp_daemon.c:527:4: warning: implicit declaration of function ‘dpvs_sockopt_init’; did you mean ‘dpvs_ctrl_init’? [-Wimplicit-function-declaration] dpvs_sockopt_init(); ^~~~~~~~~~~~~~~~~ dpvs_ctrl_init CC vrrp_print.o CC vrrp_data.o CC vrrp_parser.o CC vrrp.o CC vrrp_notify.o CC vrrp_scheduler.o CC vrrp_sync.o CC vrrp_arp.o CC vrrp_if.o CC vrrp_track.o CC vrrp_ipaddress.o CC vrrp_ndisc.o CC vrrp_if_config.o CC vrrp_static_track.o CC vrrp_vmac.o CC vrrp_ipsecah.o CC vrrp_iproute.o CC vrrp_iprule.o CC vrrp_ip_rule_route_parser.o CC vrrp_firewall.o CC vrrp_iptables.o CC vrrp_iptables_cmd.o AR libvrrp.a make[4]: Leaving directory '/opt/dpvs/tools/keepalived/keepalived/vrrp'
Also i started dpvs with gdb, and getting following error; Thread 8 "lcore-worker-5" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffee7fc400 (LWP 26659)] 0x000000000096c2fa in neigh_add_table ()
Kni: dev dpdk0 link mcast: Kni: new [00] 33:33:00:00:00:01 Kni: new [01] 01:00:5e:00:00:01 Kni: new [02] 33:33:ff:1d:91:38 Kni: new [03] 01:00:5e:00:00:12 Kni: old [00] 33:33:00:00:00:01 Kni: old [01] 33:33:00:00:00:02 Kni: old [02] 01:00:5e:00:00:01 Kni: old [03] 33:33:ff:1d:91:38 Kni: kni_mc_list_cmp_set: add mc addr: 01:00:5e:00:00:12 dpdk0 OK Kni: kni_mc_list_cmp_set: del mc addr: 33:33:00:00:00:02 dpdk0 OK Kni: update maddr of dpdk0 OK!
Thread 8 "lcore-worker-5" received signal SIGSEGV, Segmentation fault. [Switching to Thread 0x7fffee7fc400 (LWP 26659)] 0x000000000096c2fa in neigh_add_table () Missing separate debuginfos, use: yum debuginfo-install bzip2-libs-1.0.6-26.el8.x86_64 elfutils-libelf-0.189-3.el8.x86_64 glibc-2.28-236.0.1.el8.7.x86_64 libzstd-1.4.4-1.0.1.el8.x86_64 numactl-libs-2.0.12-13.el8.x86_64 openssl-libs-1.1.1k-7.el8_6.x86_64 xz-libs-5.2.4-4.el8_6.x86_64 zlib-1.2.11-20.el8.x86_64
Than I removed __rte_cache_aligned and rebuild, currently with gdb i am not seeing any errors. Looks like resolved.
I figured out that KERNEL_VERSION="4.18.0-513.9.1.el8_9.x86_64" DPDK_VERSION="20.11.1" DPVS_VERSION="1.9.4" and if we use PKG_CONFIG_VERSION="0.29.2" we are having problems.
with 1.4.2 pkg-config, properly worked without problem.
__rte_cache_aligned and pkg-config is the key while building, thank you, solved.