更新时间:2022-10-14 09:11:30
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
1.cdn nginx httpcode分析脚本
http: / / caiguangguang.blog. 51cto .com / 1652935 / 1371902
id_list_success = [ 200 , 206 , 300 , 301 , 302 , 303 , 304 , 305 , 306 , 307 ]
#是源里面更新,cdn代码 flow = 0 # 这些状态码的流量
flow1 = 0 #总流量
flow_ppsucai = 0 #对应域名的流量
count = 0 #总行数
count_sucai = 0
count_sucai_100 = 0
count_sucai_30_100 = 0
count_sucai_30 = 0
三种状态 sum_time = 0.0
统计所需要时间 count_success = count_200 + count_300
response_time = round (sum_time / count_success, 2 )
所有时间 response_time_source = round (sum_time_source / count_success, 2 )
200 响应时间
count_200_backup = 0
count_not_200_backup = 0
if web_code not in id_list_200 and backup_server not in server_list:
#print web_code, backup_server
count_not_200_backup + = 1
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
|
2. 根据现有的域名监控
参考 zabbix应用之nginx统一监控 accepts,handled,requests,active,reading,writing,waiting cat nginx_site_discovery.conf UserParameter = nginxSiteDiscovery,bash / usr / local / zabbix / bin / nginx_monitor.sh nginxSiteDiscovery
UserParameter = getNginxStatus[ * ],bash / usr / local / zabbix / bin / nginx_monitor.sh getNginxStatus "$1" "$2"
cat / usr / local / zabbix / bin / nginx_monitor.sh
#!/bin/bash # # Filename: nginx_monitor.sh # Revision: 1.0 # Date: 2014/09/24 # Author: Qicheng # Email: # Website: http://qicheng0211.blog.51cto.com # Description: nginx统一监控脚本 # Notes: # # 修改AGENT_CONF的值为本地zabbix agent的配置文件路径 AGENT_CONF = "/usr/local/zabbix/etc/zabbix_agentd.conf"
# nginx站点的配置文件路径 NGINX_SITE_CONF = "/usr/local/zabbix/scripts/nginx_site.conf"
# zabbix_sender的路径 ZBX_SENDER = "/usr/local/bin/zabbix_sender"
FUNCTION = $ 1
HOST_NAME = $ 2
NGINX_SITE = $ 3
CURL = "/usr/bin/curl"
TIMEOUT = 30
# nginx site low-level discovery function nginxSiteDiscovery() { nginx_site = ($(grep '^[^#]' ${NGINX_SITE_CONF}))
max_index = $[${ #nginx_site[@]}-1]
printf '{\n'
printf '\t"data":['
for key in `seq - s ' ' 0 $max_index`
do
printf '\n\t\t{'
printf "\"{#NGINX_SITE}\":\"${nginx_site[${key}]}\"}"
if [ $key - ne $max_index ];then
printf ","
fi
done
printf '\n\t]\n'
printf '}\n'
} # 获取nginx status,把数据发送到zabbix server function getNginxStatus() { nginx_status_url = "${NGINX_SITE}/nginx_status"
# 获取nginx_status后,保存到下面的文件里
nginx_status_file = "/tmp/nginx_status_$(echo ${NGINX_SITE} | sed 's#^http.*://##; s#/#_#g').log"
:> "$nginx_status_file"
# curl获取nginx_status
${CURL} - s - - connect - timeout ${TIMEOUT} "$nginx_status_url" 2 >& 1 | tee "$nginx_status_file"
line_num = $(cat "$nginx_status_file" | wc - l)
# 判断是否正确获取nginx_status
[ $line_num - ne 4 ] && { echo "ERROR: $nginx_status_file is not correct." ; exit 1 ;}
active = $(cat "$nginx_status_file" | grep 'Active' | awk '{print $NF}' )
reading = $(cat "$nginx_status_file" | grep 'Reading' | awk '{print $2}' )
writing = $(cat "$nginx_status_file" | grep 'Writing' | awk '{print $4}' )
waiting = $(cat "$nginx_status_file" | grep 'Waiting' | awk '{print $6}' )
accepts = $(cat "$nginx_status_file" | awk NR = = 3 | awk '{print $1}' )
handled = $(cat "$nginx_status_file" | awk NR = = 3 | awk '{print $2}' )
requests = $(cat "$nginx_status_file" | awk NR = = 3 | awk '{print $3}' )
echo "Sending the data to zabbix server..."
# 将特定格式的数据发送到zabbix server,每行的格式为:<hostname> <key> <value>
cat << EOF | ${ZBX_SENDER} - c ${AGENT_CONF} - i -
"${HOST_NAME}" "nginx_status[$NGINX_SITE,active]" "${active}"
"${HOST_NAME}" "nginx_status[$NGINX_SITE,reading]" "${reading}"
"${HOST_NAME}" "nginx_status[$NGINX_SITE,writing]" "${writing}"
"${HOST_NAME}" "nginx_status[$NGINX_SITE,waiting]" "${waiting}"
"${HOST_NAME}" "nginx_status[$NGINX_SITE,accepts]" "${accepts}"
"${HOST_NAME}" "nginx_status[$NGINX_SITE,handled]" "${handled}"
"${HOST_NAME}" "nginx_status[$NGINX_SITE,requests]" "${requests}"
EOF } [ $ # -eq 0 ] && { echo "ERROR: The script needs at least one parameter."; exit 1;}
case $FUNCTION in
nginxSiteDiscovery|getNginxStatus)
$FUNCTION
;;
* )
echo "ERROR: Bad parameters."
exit 1
;;
esac |
效果图
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
3.nagios 监控nginx状态(check_nginx_status.pl)
3.1 被监控端设置
测试脚本 . / check_nginx_status.pl - H 192.168 . 1.247 - s www.xxx.com - P 80
其中红色为nginx服务器的IP地址;绿色为需要监控的网站网址 vi / usr / local / nagios / etc / nrpe.cfg 添加
command[check_nginx] = / usr / local / nagios / libexec / check_nginx_status.pl - H 192.168 . 1.247 - swww.xxx.com - P 80
删除 / tmp生成的 192.168 . 1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9 不然后面后报错!提示无法生成文件。
rm / tmp / 192.168 . 1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
3.2 nagios主机的设置
/ usr / local / nagios / libexec / check_nrpe - H 192.168 . 1.247 - c check_nginx
vi / usr / local / nagios / etc / nagios.cfg 添加
cfg_file = / usr / local / nagios / etc / objects / nginx.cfg
define host{ use linux - server
host_name nginx
alias nginx
address 被监控端IP
}
define service{ use generic - service
host_name nginx
service_description check - swap
check_command check_nrpe!check_swap
}
define service{ use generic - service
host_name nginx
service_description check - load
check_command check_nrpe!check_load
}
define service{ use generic - service
host_name nginx
service_description check - disk
check_command check_nrpe!check_sda1
}
define service{ use generic - service
host_name nginx
service_description check - users
check_command check_nrpe!check_users
}
define service{ use generic - service
host_name nginx
service_description otal_procs
check_command check_nrpe!check_total_procs
} define service{ use generic - service ; Name of service template to use
host_name nginx
service_description PING
check_command check_ping! 100.0 , 20 % ! 500.0 , 60 %
}
define service{ use generic - service
host_name nginx
service_description nginx_status
check_command check_nrpe!check_nginx!
notifications_enabled 0
}
3.3 nagios服务器报错
NGINX UNKNOWN - unable to write temporary data in : / tmp / 192.168 . 1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
解决方法:删除被监控主机 / tmp下的文件 192.168 . 1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
rm / tmp / 192.168 . 1.247_check_nginx_status8d727909e5ace94dc547c3af50af6cb9
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
4.zabbix 开源监控系列三(自定义key监控nginx_status)
1. 每台nginx server的主配置文件 都配置status模块
2. 在一台nginx server 获取到nginx status的值,写到本地一个临时文件
3. 按需求处理临时文件,使用UserParameter = nginx_status[ * ] 自定义key值的方式,
4.crontab 每分钟获取一次nginx status值
以下只在监控机上去做 1. 脚本
#!/bin/bash rm - rf / tmp / ngst / *
function GETSTATUS { for ip in { 1. . 3 }
do
curl http: / / web$ip: 8080 / ngst > / tmp / ngst / web$ip 2 > / dev / null
done
} function HANDLE { for i in { 1. . 3 }
do
cat / tmp / ngst / web$i |grep Active |awk '{print $1,$NF}' >> / tmp / ngst / web${i}_status
cat / tmp / ngst / web$i |grep Reading |awk '{print $1,$2}' >> / tmp / ngst / web${i}_status
cat / tmp / ngst / web$i |grep Writing |awk '{print $3,$4}' >> / tmp / ngst / web${i}_status
cat / tmp / ngst / web$i |grep Waiting |awk '{print $5,$6}' >> / tmp / ngst / web${i}_status
#mv /tmp/ngst/web$i.new /tmp/ngst/web$i
done
} function TOTAL{ cat / tmp / ngst / * status |grep Active |awk '{total+=$NF}END{print total}' > / tmp / ngst / total #
总的 Active 连接数 } GETSTATUS HANDLE TOTAL 2. 设置zabbix agent端 只需修改配置文件,添加如下几行,默认zabbix自定义key以知晓
UserParameter = nginx_status.total, cat / tmp / ngst / total
UserParameter = nginx_status[ * ], cat / tmp / ngst / web1 |grep "$1" |awk '{print $NF}'
UserParameter = nginx2_status[ * ], cat / tmp / ngst / web2 |grep "$1" |awk '{print $NF}'
UserParameter = nginx3_status[ * ], cat / tmp / ngst / web3 |grep "$1" |awk '{print $NF}'
#只需要写这么几行就能实现对 Active Reading 等或者其他值得监控。 |
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
|
5.zabbix_sender 主动上传k / v监控nginx日志状态码
http: / / benpaozhe.blog. 51cto .com / 10239098 / 1904654
在items多了后,有一些速度慢的items如果不采用主动模式,会把server端拖死,而zabbix_sender其实是一种变相的主动模式,配合计划任务,主动将k / v上传到zabbix,现将nginx日志zabbix_sender实现状态码的监控抛砖引玉做下介绍
1.agent 端编写脚本和计划任务
需求是监控nginx日志的 200 、 400 、 401 、 403 、 404 、 499 、 502 、 503 、 504 状态码,按分钟进行数量统计上报。编写脚本入下
#!/bin/bash log_dir = / data1 / ms / comos / logs / access.log #log目录
log_tmp_dir = / tmp / last_min_log #过滤出最后1分钟的日志做临时存放
senderfile = / tmp / sender_file #用zabbix_sender发送的k/v文件
Hostname = `hostname` #获取主机名,与server端主机名会有验证
last_min = `date - d "1 minute ago" '+%Y:%H:%M' ` #nginx里的时间,1分钟之前
tail - 60000 ${log_dir} |grep "${last_min}" > ${log_tmp_dir} #qps在500到800之间,增加性能,tail结尾60000条取出最后一分钟日志。
cat / tmp / last_min_log |awk - F '"' '{print $3}' |awk '{print $1}' |sort |uniq - c > / tmp / stat #将状态码去重统计
>$senderfile #清零zabbix_sender的发送文件
c_200 = `awk '$2==200{print $1}' / tmp / stat`;[ - z $c_200 ] && c_200 = 0 #取出数量,如果没有此状态码,数量初始化为0
c_400 = `awk '$2==400{print $1}' / tmp / stat`;[ - z $c_400 ] && c_400 = 0
c_401 = `awk '$2==401{print $1}' / tmp / stat`;[ - z $c_401 ] && c_401 = 0
c_403 = `awk '$2==403{print $1}' / tmp / stat`;[ - z $c_403 ] && c_403 = 0
c_404 = `awk '$2==404{print $1}' / tmp / stat`;[ - z $c_404 ] && c_404 = 0
c_499 = `awk '$2==499{print $1}' / tmp / stat`;[ - z $c_499 ] && c_499 = 0
c_502 = `awk '$2==502{print $1}' / tmp / stat`;[ - z $c_502 ] && c_502 = 0
c_503 = `awk '$2==503{print $1}' / tmp / stat`;[ - z $c_503 ] && c_503 = 0
c_504 = `awk '$2==504{print $1}' / tmp / stat`;[ - z $c_504 ] && c_504 = 0
echo "$Hostname nginx_stat200 $c_200" >> $senderfile #将k/v写入zabbix_sender的发送文件,注意写法,主机名、key、value
echo "$Hostname nginx_stat400 $c_400" >> $senderfile
echo "$Hostname nginx_stat401 $c_401" >> $senderfile
echo "$Hostname nginx_stat403 $c_403" >> $senderfile
echo "$Hostname nginx_stat404 $c_404" >> $senderfile
echo "$Hostname nginx_stat499 $c_499" >> $senderfile
echo "$Hostname nginx_stat502 $c_502" >> $senderfile
echo "$Hostname nginx_stat503 $c_503" >> $senderfile
echo "$Hostname nginx_stat504 $c_504" >> $senderfile
/ usr / local / zabbix / bin / zabbix_sender - c / usr / local / zabbix / etc / zabbix_agentd.conf - i $senderfile #最终的向server发送
添加到计划任务: * / 1 * * * * / usr / local / zabbix / script / nginxlog.sh 2 >& 1
2. 配置server端和grafana进行绘图
配置项如下,将 type 设置为Zabbix trapper
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
|
6.zabbix 监控nginx性能状态
1. 编写脚步来获取nginx的相关信息
vim / usr / local / zabbix / scripts / nginx - check_performance.sh
#!/bin/bash ################################## # Zabbix monitoring script # # nginx: # - anything available via nginx stub-status module # ################################## # Contact: # vincent.viallet@gmail.com # Zabbix requested parameter ZBX_REQ_DATA = "$1"
ZBX_REQ_DATA_URL = "$2"
# Nginx defaults NGINX_STATUS_DEFAULT_URL = "www.baidu.com/nginx-status" #(这里写网站的域名)
WGET_BIN = "/usr/bin/wget"
# # Error handling: # - need to be displayable in Zabbix (avoid NOT_SUPPORTED) # - items need to be of type "float" (allow negative + float) # ERROR_NO_ACCESS_FILE = "-0.9900"
ERROR_NO_ACCESS = "-0.9901"
ERROR_WRONG_PARAM = "-0.9902"
ERROR_DATA = "-0.9903" # either can not connect / bad host / bad port
# Handle host and port if non-default if [ ! - z "$ZBX_REQ_DATA_URL" ]; then
URL = "$ZBX_REQ_DATA_URL"
else URL = "$NGINX_STATUS_DEFAULT_URL"
fi # save the nginx stats in a variable for future parsing NGINX_STATS = $($WGET_BIN - q $URL - O - 2 > / dev / null)
# error during retrieve if [ $? - ne 0 - o - z "$NGINX_STATS" ]; then
echo $ERROR_DATA
exit 1
fi # # Extract data from nginx stats # case $ZBX_REQ_DATA in
active_connections) echo "$NGINX_STATS" | head - 1 | cut - f3 - d ' ' ;;
accepted_connections) echo "$NGINX_STATS" | grep - Ev '[a-zA-Z]' | cut - f2 - d ' ' ;;
handled_connections) echo "$NGINX_STATS" | grep - Ev '[a-zA-Z]' | cut - f3 - d ' ' ;;
handled_requests) echo "$NGINX_STATS" | grep - Ev '[a-zA-Z]' | cut - f4 - d ' ' ;;
reading) echo "$NGINX_STATS" | tail - 1 | cut - f2 - d ' ' ;;
writing) echo "$NGINX_STATS" | tail - 1 | cut - f4 - d ' ' ;;
waiting) echo "$NGINX_STATS" | tail - 1 | cut - f6 - d ' ' ;;
* ) echo $ERROR_WRONG_PARAM; exit 1 ;;
esac exit 0
[root@ittestserver1 opt] # chmod +x /usr/local/zabbix/scripts/nginx-check_performance.sh
- rw - r - - r - x1 root root 1645 2 月 4 14 : 26 / usr / local / zabbix / scripts / nginx - check_performance.sh
2. 配置zabbix_agentd.conf。启用UserParameter,并配置相关的参数
[root@ittestserver1 opt] # vim /usr/local/zabbix/etc/zabbix_agentd.conf
####### USER-DEFINED MONITORED PARAMETERS ####### ### Option: UnsafeUserParameters # Allow all characters to be passed in arguments to user-defined parameters. # The following characters are not allowed: # \ ' " ` * ? [ ] { } ~ $ ! & ; ( ) < > | # @ # Additionally, newline characters are not allowed. # 0 - do not allow # 1 - allow # # Mandatory: no # Range: 0-1 # Default: # UnsafeUserParameters=0 UnsafeUserParameters = 1
### Option: UserParameter # User-defined parameter to monitor. There can be several user-defined parameters. # Format: UserParameter=<key>,<shell command> # See 'zabbix_agentd' directory for examples. # # Mandatory: no # Default: # UserParameter= UserParameter = nginx[ * ], / usr / local / zabbix / scripts / nginx - check_performance.sh "$1"
3.zabbix_get - s 10.253 . 17.20 - p 10050 - k "nginx[reading]"
|
名称 |
描述 |
|
Accepts(接受) |
NGINX 所接受的客户端连接数 |
资源: 功能 |
Handled(已处理) |
成功的客户端连接数 |
资源: 功能 |
Active(活跃) |
当前活跃的客户端连接数 |
资源: 功能 |
Dropped(已丢弃,计算得出) |
丢弃的连接数(接受 - 已处理) |
工作:错误* |
Requests(请求数) |
客户端请求数 |
工作:吞吐量 |
NGINX worker 进程接受 OS 的连接请求时 Accepts 计数器增加,而Handled 是当实际的请求得到连接时(通过建立一个新的连接或重新使用一个空闲的)。这两个计数器的值通常都是相同的,如果它们有差别则表明连接被Dropped, 往往这是由于资源限制,比如已经达到 NGINX 的worker_connections的限制.
备注:
Active :当前活跃的连接数。
Accepts: 接受的请求数
Handled: 处理的请求数(正常服务器响应,这两项应该是可以相等的)
Requests: 客户端处理的请求数。(吞吐量)
Reading: 当接收到请求时,连接离开 Waiting 状态,并且该请求本身使 Reading 状态计数增加。在这种状态下 NGINX 会读取客户端请求首部。请求首部是比较小的,因此这通常是一个快速的操作。
Writing: 请求被读取之后,其使 Writing 状态计数增加,并保持在该状态,直到响应返回给客户端。这意味着,该请求在 Writing 状态时, 一方面 NGINX 等待来自上游系统的结果(系统放在 NGINX “后面”),另外一方面,NGINX 也在同时响应。请求往往会在 Writing 状态花费大量的时间。
Waiting: 活跃的连接也可以处 于 Waiting 子状态,如果有在此刻没有活跃请求的话。新连接可以绕过这个状态并直接变为到 Reading 状态,最常见的是在使用“accept filter(接受过滤器)” 和 “deferred accept(延迟接受)”时,在这种情况下,NGINX 不会接收 worker 进程的通知,直到它具有足够的数据才开始响应。如果连接设置为 keep-alive ,那么它在发送响应后将处于等待状态
writing,waiting这两个特别注意
本文转自 liqius 51CTO博客,原文链接:http://blog.51cto.com/szgb17/1909301,如需转载请自行联系原作者