测试环境中,我们用双网卡做bond,kni口会存在内存泄露问题,跑一段时间后会报错:KNI: Out of memory,即使kni口只跑BGP路由公告流量,也会造成泄露,只是泄露时间比较长。如果健康检查走kni口的话,泄露速度比较快,一两个小时就会报内存不足错误。
dpvs.log一直打印 KNI:Out of memory 之后kni口的IP就无法通讯了 原因是kni口的所有报文rx tx全部丢包

dpdk版本:stable-17.11.2
dpvs版本:1.7.8
单臂模式,双网卡bond

Could you please give the outputs of the following commands for the problem?
- ip link show
- ip addr show
- ip route show
- dpip link show
- dpip addr show
- dpip route show
+问题环境dpvs配置:
`
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! This is dpvs default configuration file.
!
! The attribute "" denotes the configuration item at initialization stage. Item of
! this type is configured oneshoot and not reloadable. If invalid value configured in the
! file, dpvs would use its default value.
!
! Note that dpvs configuration file supports the following comment type:
! * line comment: using '#" or '!'
! * inline range comment: using '<' and '>', put comment in between
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! global config
global_defs {
log_level DEBUG
log_file /data/var/log/dpvs.log
}
! netif config
netif_defs {
pktpool_size 4194296
pktpool_cache 512
device dpdk0 {
rx {
queue_number 16
descriptor_number 2048
rss all
}
tx {
queue_number 16
descriptor_number 2048
}
fdir {
mode perfect
pballoc 128k
status matched
}
! promisc_mode
! kni_name dpdk0.kni
}
device dpdk1 {
rx {
queue_number 16
descriptor_number 2048
rss all
}
tx {
queue_number 16
descriptor_number 2048
}
fdir {
mode perfect
pballoc 128k
status matched
}
! promisc_mode
! kni_name dpdk1.kni
}
bonding bond0 {
mode 4
slave dpdk0
slave dpdk1
primary dpdk0
kni_name bond0.kni
}
}
! worker config (lcores)
worker_defs {
worker cpu0 {
type master
cpu_id 0
}
worker cpu1 {
type slave
cpu_id 1
port bond0 {
rx_queue_ids 0
tx_queue_ids 0
! isol_rx_cpu_ids 9
! isol_rxq_ring_sz 1048576
}
}
worker cpu2 {
type slave
cpu_id 2
port bond0 {
rx_queue_ids 1
tx_queue_ids 1
! isol_rx_cpu_ids 10
! isol_rxq_ring_sz 1048576
}
}
worker cpu3 {
type slave
cpu_id 3
port bond0 {
rx_queue_ids 2
tx_queue_ids 2
! isol_rx_cpu_ids 11
! isol_rxq_ring_sz 1048576
}
}
worker cpu4 {
type slave
cpu_id 4
port bond0 {
rx_queue_ids 3
tx_queue_ids 3
! isol_rx_cpu_ids 12
! isol_rxq_ring_sz 1048576
}
}
worker cpu5 {
type slave
cpu_id 5
port bond0 {
rx_queue_ids 4
tx_queue_ids 4
! isol_rx_cpu_ids 13
! isol_rxq_ring_sz 1048576
}
}
worker cpu6 {
type slave
cpu_id 6
port bond0 {
rx_queue_ids 5
tx_queue_ids 5
! isol_rx_cpu_ids 14
! isol_rxq_ring_sz 1048576
}
}
worker cpu7 {
type slave
cpu_id 7
port bond0 {
rx_queue_ids 6
tx_queue_ids 6
! isol_rx_cpu_ids 15
! isol_rxq_ring_sz 1048576
}
}
worker cpu8 {
type slave
cpu_id 8
port bond0 {
rx_queue_ids 7
tx_queue_ids 7
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu9 {
type slave
cpu_id 9
port bond0 {
rx_queue_ids 8
tx_queue_ids 8
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu10 {
type slave
cpu_id 10
port bond0 {
rx_queue_ids 9
tx_queue_ids 9
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu11 {
type slave
cpu_id 11
port bond0 {
rx_queue_ids 10
tx_queue_ids 10
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu12 {
type slave
cpu_id 12
port bond0 {
rx_queue_ids 11
tx_queue_ids 11
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu13 {
type slave
cpu_id 13
port bond0 {
rx_queue_ids 12
tx_queue_ids 12
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu14 {
type slave
cpu_id 14
port bond0 {
rx_queue_ids 13
tx_queue_ids 13
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu15 {
type slave
cpu_id 15
port bond0 {
rx_queue_ids 14
tx_queue_ids 14
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
worker cpu16 {
type slave
cpu_id 16
port bond0 {
rx_queue_ids 15
tx_queue_ids 15
! isol_rx_cpu_ids 16
! isol_rxq_ring_sz 1048576
}
}
}
! timer config
timer_defs {
# cpu job loops to schedule dpdk timer management
schedule_interval 500
}
! dpvs neighbor config
neigh_defs {
unres_queue_length 128
timeout 60
}
! dpvs ipv4 config
ipv4_defs {
forwarding off
default_ttl 64
fragment {
bucket_number 4096
bucket_entries 16
max_entries 4096
ttl 1
}
}
! dpvs ipv6 config
ipv6_defs {
disable off
forwarding off
route6 {
method "hlist"
recycle_time 10
}
}
! control plane config
ctrl_defs {
lcore_msg {
ring_size 4096
multicast_queue_length 256
sync_msg_timeout_us 20000
}
ipc_msg {
unix_domain /var/run/dpvs_ctrl
}
}
! ipvs config
ipvs_defs {
conn {
conn_pool_size 33554432
conn_pool_cache 512
conn_init_timeout 3
! expire_quiescent_template
! fast_xmit_close
! redirect off
}
udp {
! defence_udp_drop
uoa_mode ipo <opp for private protocol by default, or ipo for IP-option mode>
uoa_max_trail 0 <max trails for send UOA for a connection>
timeout {
normal 300
last 3
}
}
tcp {
! defence_tcp_drop
timeout {
none 2
established 7200
syn_sent 3
syn_recv 30
fin_wait 7
time_wait 7
close 3
close_wait 7
last_ack 7
listen 120
synack 30
last 2
}
synproxy {
synack_options {
mss 1380
ttl 63
sack
! wscale
! timestamp
}
! defer_rs_syn
rs_syn_max_retry 3
ack_storm_thresh 10
max_ack_saved 3
conn_reuse_state {
close
time_wait
! fin_wait
! close_wait
! last_ack
}
}
}
}
! sa_pool config
sa_pool {
pool_hash_size 8
`
请问,使用的是什么版本DPDK出现的KNI OOM?问题是否已经解决?
可以看下这个帖子 https://bugs.dpdk.org/show_bug.cgi?id=213,还有需要调大 kni memory pool 的大小