Chinaunix首页 | 论坛 | 博客
  • 博客访问: 368738
  • 博文数量: 100
  • 博客积分: 1894
  • 博客等级: 上尉
  • 技术积分: 951
  • 用 户 组: 普通用户
  • 注册时间: 2007-12-11 23:17
文章分类

全部博文(100)

文章存档

2018年(3)

2014年(2)

2013年(7)

2012年(10)

2011年(8)

2010年(6)

2009年(63)

2008年(1)

分类: 系统运维

2018-02-12 21:30:35

1. 拉取镜像
docker pull prom/alertmanager
docker pull prom/prometheus
docker pull google/cadvisor
docker pull prom/node-exporter
docker pull grafana/grafana

2. 运行cadvisor
docker run \ --volume=/:/rootfs:ro \ --volume=/var/run:/var/run:rw \ --volume=/sys:/sys:ro \ --volume=/var/lib/docker/:/var/lib/docker:ro \ --volume=/dev/disk/:/dev/disk:ro \ --publish=8080:8080 \ --detach=true \ --name=cadvisor \ google/cadvisor:latest

3. 运行prom/node-exporter
docker run -d --name=node-exporter -p 9100:9100 \ -v "/proc:/host/proc:ro" \ -v "/sys:/host/sys:ro" \ -v "/:/rootfs:ro" \ --net="host" prom/node-exporter

4.配置和运行prom/prometheus
mkdir /opt/prometheus
----------------------
[root@salt-st2 prometheus]# cat /opt/prometheus/prometheus.yml
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 15 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).

# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets:
- 10.28.0.222:9093

# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "rules.yml"
# - "second_rules.yml"

# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=` to any timeseries scraped from this config.
- job_name: 'localhost'

# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.

static_configs:
- targets: ['localhost:9090']

- job_name: '10.28.0.222'
static_configs:
- targets: ['10.28.0.222:9100']


- job_name: 'Mesos'
static_configs:
- targets: ['10.28.0.200:9100','10.28.0.201:9100','10.28.0.202:9100']

- job_name: 'Container'
static_configs:
- targets: ['10.28.0.222:8080','10.28.0.201:8080','10.28.0.202:8080']
-----------------------
----------------
[root@salt-st2 prometheus]# cat /opt/prometheus/rules.yml
groups:
- name: DOWN
rules:

# Alert for any instance that is unreachable for >5 minutes.
- alert: InstanceDown
expr: up == 0
for: 1m
# labels:
# severity: page
annotations:
summary: "Instance {{ $labels.instance }} down"
description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 1 minutes."
---------------------

docker run --name prom -d -p 9090:9090 -v /opt/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml -v /opt/prometheus:/prometheus prom/prometheus

docker exec -it prom sh
cp /prometheus/rules.yml /opt/prometheus/
docker restart prom

5. 配置和运行prom/alertmanager
mkdir /opt/alertmanager
-----------------
[root@salt-st2 alertmanager]# cat /opt/alertmanager/config.yml
global:
# The smarthost and SMTP sender used for mail notifications.
smtp_smarthost: 'smtp.163.com:25'
smtp_from: 'xxxxx@163.com'
smtp_auth_username: 'xxxxx@163.com'
smtp_auth_password: 'xxxx'
smtp_require_tls: false
# The auth token for Hipchat.
# hipchat_auth_token: '1234556789'
# Alternative host for Hipchat.
# hipchat_api_url: ''

# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'

# The root route on which each incoming alert enters.
route:
# The labels by which incoming alerts are grouped together. For example,
# multiple alerts coming in for cluster=A and alertname=LatencyHigh would
# be batched into a single group.
group_by: ['alertname', 'cluster', 'service']

# When a new group of alerts is created by an incoming alert, wait at
# least 'group_wait' to send the initial notification.
# This way ensures that you get multiple alerts for the same group that start
# firing shortly after another are batched together on the first
# notification.
group_wait: 30s

# When the first notification was sent, wait 'group_interval' to send a batch
# of new alerts that started firing for that group.
group_interval: 5m

# If an alert has successfully been sent, wait 'repeat_interval' to
# resend them.
repeat_interval: 3h

# A default receiver
receiver: devops

# All the above attributes are inherited by all child routes and can
# overwritten on each.

receivers:
- name: 'devops'
email_configs:
- to: '42143xxxx@qq.com'
----------------------------

docker run -d -p 9093:9093 --name=alertmanager -v /opt/alertmanager:/alertmanager -v /opt/alertmanager/config.yml:/etc/alertmanager/config.yml prom/alertmanager

docker exec -it alertmanager sh
mkdir /etc/alertmanager/template
下载如下文件到这个目录
cp email.html email.default.html
docker restart alertmanager

6. 运行grafana
mkdir /opt/grafana
docker run -d -p 3000:3000 --name=grafana -v /opt/grafana:/var/lib/grafana grafana/grafana

打开浏览器
默认账号密码登录后添加prometheus数据源




阅读(2378) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~