最近storm集群由于一些不知名原因导致storm集群中有些任务不能正常启动执行(好几个项目再用),所以想弄个监控系统将storm集群的状态给监控起来并做相应的报警处理。最后选定了开源的监控平台ganglia。通过查阅相关资料,ganglia可以使用jmxtrans来对storm进行监控,jmxtrans作为ganglia与storm之间的通信桥梁。
1. 下载并安装jmxtrans
可以使用wget下载
wget
sudo rpm -i jmxtrans-20121016.145842.6a28c97fbb-0.noarch.rpm
安装好后的jmxtrans目录在/usr/share/jmxtrans,配置文件在/etc/sysconfig/jmxtrans目录,监控storm的json脚本放在/var/lib/jmxtrans目录。
2. 修改storm配置
在storm配置文件storm.yaml中加入如下两个参数:
supervisor.childopts: "-verbose:gc -XX:+PrintGCTimeStamps -XX:+PrintGCDetails -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=12346"
nimbus.childopts: "-verbose:gc -XX:+PrintGCTimeStamps -XX:+PrintGCDetails -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.port=12345"
端口号12345和12346可以随便设置,最好是没有用的或不常用的端口号
3. 编写监控脚本
supervisor.json
-
{
-
"servers" : [ {
-
"port" : "12346",
-
"host" : "IP_OF_SUPERVISOR_MACHINE",
-
"queries" : [ {
-
"outputWriters": [{
-
"@class":"com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "supervisor",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649" }
-
}],
-
"obj": "java.lang:type=Memory",
-
"resultAlias": "supervisor",
-
"attr": ["ObjectPendingFinalizationCount"]
-
},
-
{
-
"outputWriters": [{
-
"@class":"com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "supervisor",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649"
-
}
-
}],
-
"obj": "java.lang:name=Copy,type=GarbageCollector",
-
"resultAlias": "supervisor",
-
"attr": [
-
"CollectionCount",
-
"CollectionTime"
-
]
-
},
-
{
-
"outputWriters": [{
-
"@class": "com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "supervisor",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649"
-
}
-
}],
-
"obj": "java.lang:name=Code Cache,type=MemoryPool",
-
"resultAlias": "supervisor",
-
"attr": [
-
"CollectionUsageThreshold",
-
"CollectionUsageThresholdCount",
-
"UsageThreshold",
-
"UsageThresholdCount"
-
]
-
},
-
{
-
"outputWriters": [{
-
"@class": "com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "supervisor",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649"
-
}
-
}],
-
"obj": "java.lang:type=Runtime",
-
"resultAlias": "supervisor",
-
"attr": [
-
"StartTime",
-
"Uptime"
-
]
-
}],
-
"numQueryThreads" : 2
-
}]
-
}
端口12346是jmx服务的端口,这个端口在storm.yaml中进行设置的;
IP_OF_SUPERVISOR_MACHINE是supervisor节点的IP地址
IP_OF_GANGLIA_GMOND_SERVER是ganglia gmond服务所在的服务器的IP
nimbus.json
-
{
-
"servers" : [{
-
"port" : "12345",
-
"host" : "IP_OF_NIMBUS_MACHINE",
-
"queries" : [
-
{ "outputWriters": [{
-
"@class": "com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "nimbus",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649"
-
}
-
}],
-
"obj": "java.lang:type=Memory",
-
"resultAlias": "nimbus",
-
"attr": ["ObjectPendingFinalizationCount"]
-
},
-
{
-
"outputWriters": [{
-
"@class": "com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "nimbus",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649"
-
}
-
}],
-
"obj": "java.lang:name=Copy,type=GarbageCollector",
-
"resultAlias": "nimbus",
-
"attr": [
-
"CollectionCount",
-
"CollectionTime"
-
]
-
},
-
{
-
"outputWriters": [{
-
"@class": "com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "nimbus",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649"
-
}
-
}],
-
"obj": "java.lang:name=Code Cache,type=MemoryPool",
-
"resultAlias": "nimbus",
-
"attr": [
-
"CollectionUsageThreshold",
-
"CollectionUsageThresholdCount",
-
"UsageThreshold",
-
"UsageThresholdCount"
-
]
-
},
-
{
-
"outputWriters": [{
-
"@class": "com.googlecode.jmxtrans.model.output.GangliaWriter",
-
"settings": {
-
"groupName": "nimbus",
-
"host": "IP_OF_GANGLIA_GMOND_SERVER",
-
"port": "8649"
-
}
-
}],
-
"obj": "java.lang:type=Runtime",
-
"resultAlias": "nimbus",
-
"attr": [
-
"StartTime",
-
"Uptime"
-
]
-
}],
-
"numQueryThreads" : 2
-
} ]
-
}
端口12345是jmx服务的端口,这个端口在storm.yaml中进行设置的;
IP_OF_NIMBUS_MACHINE是nimbus节点的IP地址
IP_OF_GANGLIA_GMOND_SERVER是ganglia gmond服务所在的服务器的IP
4. 将nimbus.json和supervisor.json脚本拷贝到/var/lib/jmxtrans目录下
5. 启动jmxtrans服务
cd /usr/share/jmxtrans
sudo ./jmxtrans strart
6. 现在就可以在ganglia的wen服务上查看到storm集群的运行状态了
阅读(694) | 评论(0) | 转发(0) |