且构网

分享程序员开发的那些事...
且构网 - 分享程序员编程开发的那些事

0503linux内核网络参数测试tcp_keepalive

更新时间:2022-08-27 21:23:12

[20170503]linux内核网络参数测试tcp_keepalive.txt

# echo /proc/sys/net/ipv4/tcp_keepalive* | xargs   -n 1  strings -1 -f
/proc/sys/net/ipv4/tcp_keepalive_intvl: 75
/proc/sys/net/ipv4/tcp_keepalive_probes: 9
/proc/sys/net/ipv4/tcp_keepalive_time: 7200

参数解析:
/proc/sys/net/ipv4/tcp_keepalive_time    当keepalive起用的时候,TCP发送keepalive消息的频度。默认是2小时。
/proc/sys/net/ipv4/tcp_keepalive_intvl   当探测没有确认时,keepalive探测包的发送间隔。缺省是75秒。
/proc/sys/net/ipv4/tcp_keepalive_probes  如果对方不予应答,keepalive探测包的发送次数。缺省值是9。

1.测试说明:
client_ip=192.168.101.6
servier_ip=192.168.31.8

2.测试:
--//首先在在服务端执行如下:
# tcpdump -i eth0  host 192.168.101.6 and not port 22 and port 1521 -nnn

--//(注***在tmux下运行,这样即使断开网络,程序也一直在后台运行),如果没有tmux,可以使用screen,或者使用nohup 命令,把结果输出到
--//文本文件中.然后拔掉client端的网线(有点粗暴,等下午观察)
--//测试很简单启动sqlplus连接服务器192.168.31.8(client ip=192.168.101.6)

# tcpdump -i eth0  host 192.168.101.6 and not port 22 and port 1521 -nnn
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 96 bytes
11:45:35.556171 IP 192.168.101.6.61511 > 192.168.31.8.1521: P 266196060:266196323(263) ack 266559111 win 16087
11:45:35.556545 IP 192.168.31.8.1521 > 192.168.101.6.61511: P 1:269(268) ack 263 win 277
11:45:35.557302 IP 192.168.101.6.61511 > 192.168.31.8.1521: P 263:284(21) ack 269 win 16425
11:45:35.557389 IP 192.168.31.8.1521 > 192.168.101.6.61511: P 269:371(102) ack 284 win 277
11:45:35.756763 IP 192.168.101.6.61511 > 192.168.31.8.1521: . ack 371 win 16399
--//client_ip=192.168.101.6 servier_ip=192.168.31.8
13:45:35.622771 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 对比前面的时间正好2个小时.
13:46:50.622714 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:48:05.622669 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:49:20.622606 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:50:35.622581 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:51:50.622536 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:53:05.624812 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:54:20.629661 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:55:35.630374 IP 192.168.31.8.1521 > 192.168.101.6.61511: . ack 284 win 277    <== 间隔75秒
13:56:50.634701 IP 192.168.31.8.1521 > 192.168.101.6.61511: R 371:371(0) ack 284 win 277  <== 间隔75秒

--//后面间隔75秒,次数正好9次,充分验证前面参数tcp_keepalive*的设置.忘记观察是否这是连接已经断开了.

--//也可以修改/etc/sysctl.conf参数文件:
net.ipv4.tcp_keepalive_time = 200
net.ipv4.tcp_keepalive_intvl = 10
net.ipv4.tcp_keepalive_probes = 4

--//执行sysctl -p 生效.这是我在另外机器的测试:(注这次没有断开网络).

# tcpdump -i eth2  host 192.168.101.6 and not port 22 -nnn
tcpdump: WARNING: eth2: no IPv4 address assigned
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth2, link-type EN10MB (Ethernet), capture size 65535 bytes
15:56:54.895543 IP 192.168.101.6.54835 > 192.168.90.16.1521: Flags [P.], seq 915087626:915087889, ack 2674786075, win 16087, length 263
15:56:54.901859 IP 192.168.90.16.1521 > 192.168.101.6.54835: Flags [P.], seq 1:1275, ack 263, win 345, length 1274
15:56:54.903137 IP 192.168.101.6.54835 > 192.168.90.16.1521: Flags [P.], seq 263:284, ack 1275, win 16425, length 21
15:56:54.903262 IP 192.168.90.16.1521 > 192.168.101.6.54835: Flags [P.], seq 1275:1377, ack 284, win 345, length 102
15:56:55.094738 IP 192.168.101.6.54835 > 192.168.90.16.1521: Flags [.], ack 1377, win 16399, length 0

16:00:15.094297 IP 192.168.90.16.1521 > 192.168.101.6.54835: Flags [.], ack 284, win 345, length 0
16:00:15.100102 IP 192.168.101.6.54835 > 192.168.90.16.1521: Flags [.], ack 1377, win 16399, length 0
16:03:35.100284 IP 192.168.90.16.1521 > 192.168.101.6.54835: Flags [.], ack 284, win 345, length 0
16:03:35.101480 IP 192.168.101.6.54835 > 192.168.90.16.1521: Flags [.], ack 1377, win 16399, length 0

--//3*60+20=200

16:06:55.101658 IP 192.168.90.16.1521 > 192.168.101.6.54835: Flags [.], ack 284, win 345, length 0
16:06:55.102206 IP 192.168.101.6.54835 > 192.168.90.16.1521: Flags [.], ack 1377, win 16399, length 0

--!!以后都是200秒.从服务器发起连接监测时候网络正常.
--//当然也可以修改sqlnet.ora文件,加入sqlnet.expire_time=1,明天继续测试2种混合在一起的情况.

3.测试2者混合在一起的情况:
client_ip=192.168.101.6
servier_ip=192.168.31.8

--//也可以修改/etc/sysctl.conf参数文件:
net.ipv4.tcp_keepalive_time = 200
net.ipv4.tcp_keepalive_intvl = 10
net.ipv4.tcp_keepalive_probes = 4

# echo /proc/sys/net/ipv4/tcp_keepalive* | xargs   -n 1  strings -1 -f
/proc/sys/net/ipv4/tcp_keepalive_intvl: 10
/proc/sys/net/ipv4/tcp_keepalive_probes: 4
/proc/sys/net/ipv4/tcp_keepalive_time: 200

$ cat sqlnet.ora
DIAG_ADR_ENABLED = OFF
sqlnet.expire_time=1
SQLNET.INBOUND_CONNECT_TIMEOUT =0

--//重启监听略:
$ lsntctl stop;sleep 1 ; lsnrctl start

# tcpdump -i eth0  host 192.168.101.6 and not port 22 and port 1521 -nnn
09:03:25.054059 IP 192.168.31.8.1521 > 192.168.101.6.56298: P 5832:5849(17) ack 6956 win 254
09:03:25.054381 IP 192.168.101.6.56298 > 192.168.31.8.1521: P 6956:6969(13) ack 5849 win 16091
09:03:25.054463 IP 192.168.31.8.1521 > 192.168.101.6.56298: P 5849:5866(17) ack 6969 win 254
09:03:25.256361 IP 192.168.31.8.1521 > 192.168.101.6.56298: P 5849:5866(17) ack 6969 win 254
09:03:25.256685 IP 192.168.101.6.56298 > 192.168.31.8.1521: . ack 5866 win 16087 <nop,nop,sack 1 {5849:5866}>
..
09:05:25.034910 IP 192.168.31.8.1521 > 192.168.101.6.56298: P 5866:5876(10) ack 6969 win 254
09:05:25.228989 IP 192.168.101.6.56298 > 192.168.31.8.1521: . ack 5876 win 16085
09:06:25.047668 IP 192.168.31.8.1521 > 192.168.101.6.56298: P 5876:5886(10) ack 6969 win 254
09:06:25.250725 IP 192.168.101.6.56298 > 192.168.31.8.1521: . ack 5886 win 16082
09:07:25.059427 IP 192.168.31.8.1521 > 192.168.101.6.56298: P 5886:5896(10) ack 6969 win 254
09:07:25.260497 IP 192.168.101.6.56298 > 192.168.31.8.1521: . ack 5896 win 16080

--//可以发现起作用是sqlnet.expire_time=1,可能是设置sqlnet.expire_time=1时间太短.
--//注开始发起第1个包时间间隔是sqlnet.expire_time的2倍,也就是2分钟.以后才是1分钟间隔.
--//修改net.ipv4.tcp_keepalive_time = 20测试.

# echo /proc/sys/net/ipv4/tcp_keepalive* | xargs   -n 1  strings -1 -f
/proc/sys/net/ipv4/tcp_keepalive_intvl: 10
/proc/sys/net/ipv4/tcp_keepalive_probes: 4
/proc/sys/net/ipv4/tcp_keepalive_time: 20

--//sqlplus重新连接测试....
# tcpdump -i eth0  host 192.168.101.6 and not port 22 and port 1521 -nnn
09:11:33.105651 IP 192.168.31.8.1521 > 192.168.101.6.57471: P 5832:5849(17) ack 6955 win 277
09:11:33.106017 IP 192.168.101.6.57471 > 192.168.31.8.1521: P 6955:6968(13) ack 5849 win 16091
09:11:33.106101 IP 192.168.31.8.1521 > 192.168.101.6.57471: P 5849:5866(17) ack 6968 win 277
09:11:33.301576 IP 192.168.101.6.57471 > 192.168.31.8.1521: . ack 5866 win 16087
...
09:13:33.077109 IP 192.168.31.8.1521 > 192.168.101.6.57471: P 5866:5876(10) ack 6968 win 277
09:13:33.279214 IP 192.168.101.6.57471 > 192.168.31.8.1521: . ack 5876 win 16085
09:14:33.088855 IP 192.168.31.8.1521 > 192.168.101.6.57471: P 5876:5886(10) ack 6968 win 277
09:14:33.299118 IP 192.168.101.6.57471 > 192.168.31.8.1521: . ack 5886 win 16082
--//可以发现起作用是sqlnet.expire_time=1,linux内核参数不起作用.

4.还是取消sqlnet.expire_time=1看看.
--//注原来的会话一定要退出.不然设置sqlnet.expire_time=1依旧起作用.

# tcpdump -i eth0  host 192.168.101.6 and not port 22 and port 1521 -nnn
09:16:45.369765 IP 192.168.101.6.58289 > 192.168.31.8.1521: P 6955:6968(13) ack 5849 win 16091
09:16:45.369854 IP 192.168.31.8.1521 > 192.168.101.6.58289: P 5849:5866(17) ack 6968 win 254
09:16:45.565878 IP 192.168.101.6.58289 > 192.168.31.8.1521: . ack 5866 win 16087
..
09:17:05.566768 IP 192.168.31.8.1521 > 192.168.101.6.58289: . ack 6968 win 254
09:17:05.567087 IP 192.168.101.6.58289 > 192.168.31.8.1521: . ack 5866 win 16087
09:17:25.567023 IP 192.168.31.8.1521 > 192.168.101.6.58289: . ack 6968 win 254
09:17:25.567339 IP 192.168.101.6.58289 > 192.168.31.8.1521: . ack 5866 win 16087
09:17:45.567278 IP 192.168.31.8.1521 > 192.168.101.6.58289: . ack 6968 win 254
09:17:45.567595 IP 192.168.101.6.58289 > 192.168.31.8.1521: . ack 5866 win 16087
09:18:05.567537 IP 192.168.31.8.1521 > 192.168.101.6.58289: . ack 6968 win 254
09:18:05.567859 IP 192.168.101.6.58289 > 192.168.31.8.1521: . ack 5866 win 16087

--//间隔20秒监测1次.
--//从测试可以发现如果2个都设置,起作用的是sqlnet.expire_time,估计设置很大没意义.(不会大于设置120分钟吧).