1. 拉取镜像
docker pull prom/alertmanager
docker pull prom/prometheus
docker pull google/cadvisor
docker pull prom/node-exporter
docker pull grafana/grafana
2. 运行cadvisor
docker run \
--volume=/:/rootfs:ro \
--volume=/var/run:/var/run:rw \
--volume=/sys:/sys:ro \
--volume=/var/lib/docker/:/var/lib/docker:ro \
--volume=/dev/disk/:/dev/disk:ro \
--publish=8080:8080 \
--detach=true \
--name=cadvisor \
google/cadvisor:latest
3. 运行prom/node-exporter
docker run -d --name=node-exporter -p 9100:9100 \
-v "/proc:/host/proc:ro" \
-v "/sys:/host/sys:ro" \
-v "/:/rootfs:ro" \
--net="host" prom/node-exporter
4.配置和运行prom/prometheus
mkdir /opt/prometheus
----------------------
[root@salt-st2 prometheus]# cat /opt/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 10.28.0.222:9093
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "rules.yml"
# - "second_rules.yml"
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=` to any timeseries scraped from this config.
- job_name: 'localhost'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
- job_name: '10.28.0.222'
static_configs:
- targets: ['10.28.0.222:9100']
- job_name: 'Mesos'
static_configs:
- targets: ['10.28.0.200:9100','10.28.0.201:9100','10.28.0.202:9100']
- job_name: 'Container'
static_configs:
- targets: ['10.28.0.222:8080','10.28.0.201:8080','10.28.0.202:8080']
-----------------------
----------------
[root@salt-st2 prometheus]# cat /opt/prometheus/rules.yml
groups:
- name: DOWN
rules:
# Alert for any instance that is unreachable for >5 minutes.
- alert: InstanceDown
expr: up == 0
for: 1m
# labels:
# severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
---------------------
docker run --name prom -d -p 9090:9090 -v /opt/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml -v /opt/prometheus:/prometheus prom/prometheus
docker exec -it prom sh
cp /prometheus/rules.yml /opt/prometheus/
docker restart prom
5. 配置和运行prom/alertmanager
mkdir /opt/alertmanager
-----------------
[root@salt-st2 alertmanager]# cat /opt/alertmanager/config.yml
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'smtp.163.com:25'
smtp_from: 'xxxxx@163.com'
smtp_auth_username: 'xxxxx@163.com'
smtp_auth_password: 'xxxx'
smtp_require_tls: false
# The auth token for Hipchat.
# hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
# hipchat_api_url: ''
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']
# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s
# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m
# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h
# A default receiver
receiver: devops
# All the above attributes are inherited by all child routes and can
# overwritten on each.
receivers:
- name: 'devops'
email_configs:
- to: '42143xxxx@qq.com'
----------------------------
docker run -d -p 9093:9093 --name=alertmanager -v /opt/alertmanager:/alertmanager -v /opt/alertmanager/config.yml:/etc/alertmanager/config.yml prom/alertmanager
docker exec -it alertmanager sh
mkdir /etc/alertmanager/template
下载如下文件到这个目录
cp email.html email.default.html
docker restart alertmanager
6. 运行grafana
mkdir /opt/grafana
docker run -d -p 3000:3000 --name=grafana -v /opt/grafana:/var/lib/grafana grafana/grafana
打开浏览器
默认账号密码登录后添加prometheus数据源