1.环境要求:
内核要求: 官⽅方要求Linux 2.6.32-696.6.3.el6.x86_64及以上版本 配置:
软件存放⽬目录:/data/soft 应⽤用安装⽬目录:/data/services
应⽤用名称及版本:Prometheus 2.5 alertmanager:0.15.3 blackbox插件:0.13.0
2.应⽤用部署
2.1.导⼊入应⽤用程序
复制 应⽤用官⽅方下载地址https://prometheus.io/download/ ,根据操作系统,下载 prometheus-2.5.0.linux-amd64,alertmanager-0.15.3.linux-amd64,blackbox_exporter-0.13.0.linux-amd64,将安装包上传⾄至服务器器/data/soft ⽬目录:
复制 [root@prome-devops01cn ~]# cd /data/soft/
[root@prome-devops01cn soft]# ls
alertmanager-0.15.3.linux-amd64.tar.gz
blackbox_exporter-0.13.0.linux-amd64.tar.gz
2.2.部署prometheu
2.2.1.解压prometheus
复制 [root@prome-devops01cn soft]# tar xvf prometheus- 2.5.0.linux-amd64.tar.gz -C /data/services/
[root@prome-devops01cn soft]# cd /data/services/prometheus- 2.5.0.linux-amd64/
[root@prome-devops01cn prometheus-2.5.0.linux-amd64]# ls LICENSE NOTICE console_libraries consoles prometheus prometheus.yml promtool
主配置⽂文件为prometheus.yml,启动⽂文件为prometheus
2.2.2.查看默认配置⽂文件
复制 [root@prome-devops01cn prometheus-2.5.0.linux-amd64]# cat prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval
to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15
seconds. The default is every 1 minute.
# scrape_timeout is set to the global default
(10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
targets:
127.0.0.1:9093 #报警组件alertmanager地址(在下
一节说明alertmanager部署)
复制 # Load rules once and periodically evaluate them
according to the global 'evaluation_interval'.
rule_files:
- "alert_rules.yml" #指定报警表达式配置⽂文件 # - "second_rules.yml"
# A scrape configuration containing exactly one
endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=
<
job_name
>
`
to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
2.2.3.启动prometheus服务测试
复制 [root@prome-devops01cn prometheus-2.5.0.linux-amd64]# ./prometheus --config.file=prometheus.yml -- storage.tsdb.path=/data/prometheus/data --web.enable- lifecycle --web.enable-admin-api
level=info ts=2018-12-11T06:58:03.435600341Z
caller=main.go:245 build_context="(go=go1.11.1,
user=root@578ab108d0b9, date=20181106-11:40:44)"
level=info ts=2018-12-11T06:58:03.435622783Z
caller=main.go:246 host_details="(Linux 2.6.32-
696.6.3.el6.x86_64 #1 SMP Wed Jul 12 14:17:22 UTC
2017 x86_64 prome-devops01cn.pek3.example.net (none))"
level=info ts=2018-12-11T06:58:03.435642981Z
caller=main.go:247 fd_limits="(soft=65535,
hard=65535)"
level=info ts=2018-12-11T06:58:03.435658441Z
caller=main.go:248 vm_limits="(soft=unlimited,
hard=unlimited)"
level=info ts=2018-12-11T06:58:03.45888677Z
caller=web.go:399 component=web msg="Start listening
for connections" address=0.0.0.0:9090
level=info ts=2018-12-11T06:58:03.480182187Z
caller=main.go:562 msg="Starting TSDB ..."
level=info ts=2018-12-11T06:58:03.484976222Z
caller=main.go:572 msg="TSDB started"
level=info ts=2018-12-11T06:58:03.48501841Z
caller=main.go:632 msg="Loading configuration file"
filename=prometheus.yml
level=info ts=2018-12-11T06:58:03.485957364Z caller=main.go:658 msg="Completed loading of configuration file" filename=prometheus.yml level=info ts=2018-12-11T06:58:03.48597572Z caller=main.go:531 msg="Server is ready to receive web requests."
出现上⾯面的提示,说明prometheus服务启动正常
--config.file 指定配置⽂文件
--storage.tsdb.path 指定数据存储路路径 --web.enable-lifecycle 允许运⾏行行期间通过接⼝口进⾏行行关闭和重新 加载服务
--web.enable-admin-api 允许使⽤用api
2.2.4.启动服务
复制 [root@prome-devops01cn prometheus-2.5.0.linux-amd64]# [root@prome-devops01cn prometheus-2.5.0.linux-amd64]# ./prometheus --config.file=prometheus.yml -- storage.tsdb.path=/data/prometheus/data --web.enable- lifecycle --web.enable-admin-api
&
查看启动端⼝口,默认9090
[root@prome-devops01cn prometheus-2.5.0.linux-amd64]# netstat -nplt|grep 9090
tcp 0 0 0.0.0.0U9090 0.0.0.0:* LISTEN 7898/./prometheus
出现端⼝口说明启动成功
2.2.5.访问prometheus
浏览器器输⼊{ip}:9090
2.3.部署alertmanager
2.3.1.解压alertmanager
复制 [root@prome-devops01cn prometheus-2.5.0.linux-amd64]# cd /data/soft/
[root@prome-devops01cn soft]# ls alertmanager-0.15.3.linux-amd64.tar.gz blackbox_exporter-0.13.0.linux-amd64.tar.gz prometheus-2.5.0.linux-amd64.tar.gz [root@prome-devops01cn soft]# tar xvf alertmanager- 0.15.3.linux-amd64.tar.gz -C /data/services/ [root@prome-devops01cn ~]# cd /data/services/alertmanager-0.15.3.linux-amd64/ [root@prome-devops01cn alertmanager-0.15.3.linux- amd64]# ls
LICENSE NOTICE alertmanager alertmanager.yml amtool
alertmanager为启动⽂文件
alertmanager.yml为报警配置⽂文件
2.3.2.查看配置⽂文件
复制 [root@prome-devops01cn alertmanager-0.15.3.linux- amd64]# cat alertmanager.yml
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']
该配置⽂文件设置报警相关策略略,根据实际情况进⾏行行配置
2.3.3.启动测试
复制 [root@prome-devops01cn alertmanager-0.15.3.linux- amd64]# ./alertmanager -- config.file="alertmanager.yml" -- storage.path="/data/alertmanager/data"
level=info ts=2018-12-11T10:17:55.059583568Z
caller=main.go:174 msg="Starting Alertmanager"
version="(version=0.15.3, branch=HEAD,
revision=d4a7697cc90f8bce62efe7c44b63b542578ec0a1)"
level=info ts=2018-12-11T10:17:55.059660154Z
caller=main.go:175 build_context="(go=go1.11.2,
user=root@4ecc17c53d26, date=20181109-15:40:48)"
level=info ts=2018-12-11T10:17:55.062711689Z
caller=cluster.go:155 component=cluster msg="setting
advertise address explicitly" addr=10.41.11.30
level=info ts=2018-12-11T10:17:55.065446537Z caller=main.go:322 msg="Loading configuration file" file=alertmanager.yml
level=info ts=2018-12-11T10:17:55.06549233Z caller=cluster.go:570 component=cluster msg="Waiting for gossip to settle..." interval=2s
level=info ts=2018-12-11T10:17:55.068726117Z caller=main.go:398 msg=Listening address=:9093 level=info ts=2018-12-11T10:17:57.065690074Z caller=cluster.go:595 component=cluster msg="gossip not settled" polls=0 before=0 now=1 elapsed=2.000103415s
level=info ts=2018-12-11T10:18:05.066282505Z caller=cluster.go:587 component=cluster msg="gossip settled; proceeding" elapsed=10.000715033s
出现上⾯面提示,说明alertmanager服务启动正常
--config.file 指定配置⽂文件
--storage.tsdb.path 指定数据存储路路径
2.3.4.启动服务
[root@prome-devops01cn alertmanager-0.15.3.linux- amd64]# ./alertmanager -- config.file="alertmanager.yml" -- storage.path="/data/alertmanager/data" &
查看启动端⼝口,默认9093
[root@prome-devops01cn alertmanager-0.15.3.linux-amd64]# netstat -napl|grep 9093 tcp 0 0 0.0.0.0U9093 0.0.0.0:* LISTEN 7921/./alertmanager
2.3.5.访问alertmanager
浏览器器输⼊入{ip}:9093
2.4部署blackboxexporter
Blackboxexporter作为prometheus的监控组件,主要⽤用于ICMP,http,https,DNS等协议的监控
2.4.1.解压blackbox_exporter
复制 [root@prome-devops01cn soft]# tar xvf blackbox_exporter-0.13.0.linux-amd64.tar.gz -C /data/services/
[root@prome-devops01cn soft]# cd /data/services/blackbox_exporter-0.13.0.linux-amd64/ [root@prome-devops01cn blackbox_exporter- 0.13.0.linux-amd64]# ls
LICENSE NOTICE blackbox.yml blackbox_exporter 主配置⽂文件为 blackbox.yml,启动⽂文件为blackbox_exporter
2.4.2.查看默认配置⽂文件
复制 [root@prome-devops01cn blackbox_exporter- 0.13.0.linux-amd64]# cat blackbox.yml modules:
http_2xx:
prober: http
http:
http_post_2xx:
prober: http
http:
method: POST
tcp_connect:
prober: tcp
pop3s_banner:
prober: tcp
tcp:
query_response:
- expect: "^+OK"
tls: true
tls_config:
insecure_skip_verify: false
ssh_banner:
prober: tcp
tcp:
query_response:
- expect: "^SSH-2.0-"
irc_banner:
prober: tcp
tcp:
query_response:
- send: "NICK prober"
- send: "USER prober prober prober :prober"
- expect: "PING :([^ ]+)"
send: "PONG ${1}"
- expect: "^:[^ ]+ 001"
icmp:
prober: icmp
默认配置已满⾜足监控http协议的需求,因此不不需要进⾏行行修改3.启动试
复制 [root@prome-devops01cn blackbox_exporter-
0.13.0.linux-amd64]# ./blackbox_exporter --
config.file="blackbox.yml"
level=info ts=2018-12-11T14:13:23.617132472Z
caller=main.go:215 msg="Starting blackbox_exporter"
version="(version=0.13.0,branch=HEAD,revision=1cfb7512daa7e100abb32037996c8f805990d813)"level=infots=2018-12-11T14:13:23.617578759Zcaller=main.go:228msg="Loadedconfigfile"level=infots=2018-12-11T14:13:23.617670962Zcaller=main.go:332msg="Listeningonaddress"address=:9115
出现上⾯面提示,说明blackbox_exporter服务启动正常
--config.file指定配置⽂文件
2.4.3.启动服务
复制 [root@prome-devops01cn blackbox_exporter- 0.13.0.linux-amd64]# ./blackbox_exporter -- config.file="blackbox.yml"
&
查看启动端⼝口,默认9115
[root@prome-devops01cn blackbox_exporter- 0.13.0.linux-amd64]# netstat -nplt|grep 9115 tcp 0 0 0.0.0.0:9115
0.0.0.0:*8302/./blackbox_exp出现端⼝口说明启动成功
2.4.4.访问metric浏览器器输⼊入{ip}:9115
3.配置演示
3.1.配置prometheus监控URL
3.1.1.增加prometheus配置
调⽤用blackbox_exporter监控url
复制 scrape_configs:
# The job name is added as a label `job=
<
job_name
>
` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics' # scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090'] - job_name: 'blackbox'
metrics_path: /probe params:
module: [http_2xx] static_configs:
- targets:
- www.baidu.com
relabel_configs:
- source_labels: [__address__]
target_label: __param_target
- source_labels: [__param_target]
target_label: instance
- target_label: __address__
replacement: 127.0.0.1U9115
完成配置后,需重新加载配置⽂文件
复制 [root@prome-devops01cn prometheus-2.5.0.linux-amd64]# curl -X POST http://127.0.0.1U9090/-/reload
level=info ts=2018-12-11T14U41U12.266612849Z caller=main.go:632 msg="Loading configuration file" filename=prometheus.yml level=info ts=2018-12-11T14U41U12.267220069Z caller=main.go:658 msg="Completed loading of configuration file" filename=prometheus.yml
没有报错,说明新配置成功加载
3.1.2.验证监控
web打开prometheus查看"probe_http_status_code" metric
可以看到监控的百度地址,返回状态结果200,正常
3.2.添加监控报警值
在prometheus根⽬目录设置prometheus报警⽂文件alert_rules.yml(可⻅见prometheus部署⼩小节),该配置由主配置⽂文件的
复制 rule_files:
- "alert_rules.yml" 指定,相对路路径为prometheus部署路路径
[root@prome-devops01cn prometheus-2.5.0.linux-amd64]#
cat alert_rules.yml
groups:
- name: blackbox_exporter.rules
rules:
- alert: urlStatusDown
expr: probe_http_status_code{job="blackbox"}
>
200 #表达式判断url响应码⼤大于200报警
for: 1m
labels:
severity: critical
annotations:
description: '{{$labels.job}} |
{{$labels.instance}} down | value: {{$value}}.'
summary: '{{$labels.instance}}'
配置完成后,重新加载配置⽂文件
复制 [root@prome-devops01cn prometheus-2.5.0.linux-amd64]# curl -X POST http://127.0.0.1:9090/-/reload
level=info ts=2018-12-11T14:48:59.580411965Z
caller=main.go:632 msg="Loading configuration file"
filename=prometheus.yml
level=info ts=2018-12-11T14:48:59.581377255
caller=main.go:658 msg="Completed loading of
configuration file" filename=prometheus.yml
加载⽆无误后,查看prometheus alert
在alert选项中可以看到刚刚添加的报警项,报警⽅方式需要根据实际情况进⾏行行配置