Chinaunix首页 | 论坛 | 博客
  • 博客访问: 5096341
  • 博文数量: 921
  • 博客积分: 16037
  • 博客等级: 上将
  • 技术积分: 8469
  • 用 户 组: 普通用户
  • 注册时间: 2006-04-05 02:08
文章分类

全部博文(921)

文章存档

2020年(1)

2019年(3)

2018年(3)

2017年(6)

2016年(47)

2015年(72)

2014年(25)

2013年(72)

2012年(125)

2011年(182)

2010年(42)

2009年(14)

2008年(85)

2007年(89)

2006年(155)

分类: Python/Ruby

2015-07-23 14:52:23

开源的运维系统不少,比如nagios、zabbix、cati等等,但是遇到自己个性化的运维需求的时候,总是显的力不从心!最近在学习python,所以就考虑用python+django+twisted来定做一个完全个性化的运维系统。

运维系统有几个主要的功能:监控、分析、报警、更甚者直接根据分析的结果进行反应操作。而以上几点通过上述的框架可以比较容易的实现。

下面上图说明:


使用freemind整理了下思路:



下面是一些代码段,完整的代码下载见文档底部:

Server:


  1. #!/usr/bin/env python
  2. #coding:utf-8
  3. __author__ = 'dwj'


  4. from twisted.internet.protocol import ServerFactory
  5. from twisted.protocols import basic
  6. import cx_Oracle
  7. from twisted.application import service, internet



  8. class Mornitor_Protocol(basic.LineReceiver):

  9.     def __init__(self):
  10.     #
  11.         _oracle_conn=cx_Oracle.connect('xxxx', 'xxxx', '192.168.7.17/test', threaded=True)
  12.         _oracle_conn.autocommit = True
  13.         self.cur = _oracle_conn.cursor()
  14.         self._oracle_conn=_oracle_conn


  15.     def ruku(self, line):
  16.         ip=self.transport.getPeer().host
  17.         #获取客户端IP
  18.         line=line.split(':::')
  19.         #使用:::分割原始数据
  20.         if line[1] in ['cpu', 'mem', 'disk', 'tcp', 'net', 'process_down']:
  21.         #根据数据包头来确定使用insert还是update,当是tcp包头的时候插入,其余的更新
  22.             if line[1] == 'tcp':
  23.                 sql = "insert into MORNITOR_BASICINFO (ipadd,time,tcp) values (\'%s\',\'%s\',\'%s\')"%(ip,line[0],line[3])
  24.                 print sql
  25.                 self.cur.execute(sql)

  26.             else:
  27.                 line_again = line[3].split('::')
  28.                 sql = 'update MORNITOR_BASICINFO set %s=\'%s\',%s=\'%s\' where ipadd=\'%s\' and time=\'%s\''%(line[1],line_again[0],line[2],line_again[1],ip,line[0])
  29.                 print sql
  30.                 self.cur.execute(sql)

  31.     def connectionMade(self):
  32.         print 'Connected!'

  33.     def lineReceived(self, line):
  34.         print line
  35.         self.ruku(line)
  36.         #接受到数据之后执行入库操作!
  37.     def connectionLost(self, reason='connectionDone'):
  38.         self._oracle_conn.close()
  39.         print 'The db is close... ok!'


  40. class Mornitor_Factory(ServerFactory):
  41.     #还没想好要初始化什么
  42.     def __init__(self,service):
  43.         self.service = service

  44.     protocol = Mornitor_Protocol


  45. class Fish_Service(service.Service):

  46.     def __init__(self):
  47.         pass

  48.     def startService(self):
  49.         service.Service.startService(self) #什么都不做,开始服务

  50.     # def stopService(self):
  51.     # return self._port.stopListening()



  52. #配置参数
  53. port = 10000
  54. iface = '127.0.0.1'



  55. top_server = service.MultiService() #定义服务容器

  56. fish_server = Fish_Service() #实例化我们的服务
  57. fish_server.setServiceParent(top_server) #把自定义的服务加入到服务容器

  58. factory = Mornitor_Factory(Fish_Service) #工厂化服务

  59. tcp_server = internet.TCPServer(port, factory, interface=iface) #定义tcp服务
  60. tcp_server.setServiceParent(top_server) #把tcp服务加入到服务容器

  61. application = service.Application('Fish_Service') #给应用起个名字
  62. top_server.setServiceParent(application) #把服务容器丢到应用中去
Client端


  1. from twisted.protocols import basic
  2. from twisted.internet import protocol, defer, task
  3. import Get_basic_info_2 as Huoqu
  4. import guardian as shouhu
  5. import time
  6. from twisted.application import service, internet


  7. class Monitor_Protocol(basic.LineReceiver):
  8.     #自定义客户端和服务端的连接协议,从basic的line继承

  9.     def __init__(self):
  10.         #
  11.         pass

  12.     @staticmethod
  13.     def huoqu_shuju():
  14.         #定义一个函数获取本机的一些状态
  15.         now = str(time.strftime('%Y-%m-%d %H:%M:%S'))
  16.         
  17.         def add_tag(source, tag1, tag2 = 'none'):
  18.         #定义格式化字符串函数
  19.             return ':::'.join([now, tag1, tag2, source])
  20.             #使用:::分隔时间、简单信息、详细信息、原始信息
  21.         
  22.         tcp = add_tag(Huoqu.net_tcp(), 'tcp')
  23.         cpu = add_tag(Huoqu.cpu(), 'cpu', 'cpu_detail')
  24.         mem = add_tag(Huoqu.mem(), 'mem', 'mem_detail')
  25.         disk = add_tag(Huoqu.disk_usage(), 'disk', 'disk_detail')
  26.         net = add_tag(Huoqu.net_rate(), 'net', 'net_detail')
  27.         process = add_tag(shouhu.check_alive(), 'process_down', 'process_alived')
  28.         result = (tcp, cpu, mem, disk, net, process, )
  29.         d = defer.Deferred()
  30.         #使用defered返回结果
  31.         d.callback(result)
  32.         return d

  33.     def xunhuan(self, list):
  34.     #定义循环发送函数
  35.         for i in list:
  36.             self.sendLine(i)

  37.     def fasong(self):
  38.     #定义程序运行顺序,取得信息后用callback交给发送函数发送
  39.         self.huoqu_shuju().addCallback(self.xunhuan)

  40.     def loop(self):
  41.     #使用twist内置的循环函数定义几秒监控数据传送到服务端
  42.         l = task.LoopingCall(self.fasong)
  43.         l.start(1)

  44.     def connectionMade(self):
  45.     #覆盖协议的connectmade函数,定义于服务端的连接建立后开始循环
  46.         print 'Connected!......ok!'
  47.         self.loop()

  48.     def lineReceived(self, line):
  49.     #必须覆盖接受函数,否则twist会报not importent错误!
  50.         pass


  51. class Moinitor_client_factory(protocol.ReconnectingClientFactory):
  52.     
  53.     def __init__(self, service):
  54.     #还没想要要写什么
  55.         self.service = service
  56.     protocol = Monitor_Protocol


  57. class Client_Service(service.Service):

  58.     def __init__(self):
  59.         pass

  60.     def startService(self):
  61.         service.Service.startService(self)


  62. #配置文件开始
  63. port = 10000
  64. host = '127.0.0.1'

  65. #守护进程
  66. top_service = service.MultiService() #定义服务容器

  67. client_service = Client_Service() #实例化服务类
  68. client_service.setServiceParent(top_service) #把自己定义的服务丢到服务容器中

  69. factory = Moinitor_client_factory(client_service) #定义服务工厂化

  70. tcp_service = internet.TCPClient(host, port, factory) #定义tcp连接的服务
  71. tcp_service.setServiceParent(top_service) #把tcp服务丢到服务容器中去

  72. application = service.Application('Fish_Service') #定义应用名字
  73. top_service.setServiceParent(application) #把服务容器丢到应用中去

一些自定义监控程序是否存活的脚本:


  1. program = {'nginx': ['/opt/nginx/logs/nginx.pid', '/opt/nginx/sbin/nginx'],
  2.             'rsync-C': ['/var/run/rsyncd.pid', 'rsync --daemon'],
  3.             }


  4. def main():
  5.     for k in program:
  6.         a = get_pid(k, program[k][0])
  7.         if isinstance(a, tuple):
  8.             print '%s is not running!' % k
  9.             print 'Start the program by Horland_guardian!'
  10.             subprocess.call(program[k][1], shell=True)
  11.         else:
  12.             print 'The %s is running!' % k


  13. def check_alive():
  14.     l_lived = []
  15.     l_downed = []
  16.     for k in program:
  17.         a = get_pid(k, program[k][0])
  18.         if isinstance(a, tuple):
  19.             l_downed.append(k)
  20.         else:
  21.             l_lived.append(k)
  22.     process_alived = ' '.join(l_lived)
  23.     process_down = ' '.join(l_downed)

  24.     return '::'.join([process_down, process_alived])

django的使用目前只需要使用到admin模块就可以。

下面是一些代码段:

model



  1. class BasicInfo(models.Model):
  2.     ipadd = models.IPAddressField(verbose_name = u'IP地址')
  3.     time = models.CharField(max_length=50, verbose_name = u'时间')
  4.     cpu = models.CharField(max_length=255, blank=True, verbose_name = u'CPU%')
  5.     cpu_detail = models.CharField(max_length=255, blank=True, verbose_name = u'CPU详情')
  6.     mem = models.CharField(max_length=255, blank=True, verbose_name = u'内存%')
  7.     mem_detail = models.CharField(max_length=255, blank=True, verbose_name = u'内存详情')
  8.     disk = models.CharField(max_length=255, blank=True, verbose_name = u'磁盘%')
  9.     disk_detail = models.CharField(max_length=255, blank=True, verbose_name = u'磁盘详情')
  10.     net = models.CharField(max_length=255, blank=True, verbose_name = u'流量 bytes/s')
  11.     net_detail = models.CharField(max_length=1000, blank=True, verbose_name = u'流量详情')
  12.     tcp = models.CharField(max_length=255, blank=True, verbose_name = u'tcp连接状态')
  13.     process_down = models.CharField(max_length=255, blank=True, verbose_name = u'DOWN-进程')
  14.     process_alived = models.CharField(max_length=255, blank=True, verbose_name = u'Process_UP')

  15.     def Process_DOWN(self):
  16.         return '<span style="color: #%s;">%s</span>' % ('ff0000', self.process_down) #拓机的进程用红色标识
  17.     Process_DOWN.allow_tags = True

注册到admin


  1. class BasicInfo_admin(admin.ModelAdmin):

  2.     list_display = ('time', 'cpu', 'cpu_detail', 'mem', 'mem_detail', 'disk', 'disk_detail', 'net', 'net_detail', 'tcp', 'Process_DOWN', 'process_alived')
  3.     list_filter = ('ipadd', )
  4. admin.site.register(BasicInfo, BasicInfo_admin)

freemind整理的思路中还有一些功能没有实现,目前这个只能算个简单的demon吧,但是基本实现了监控的目的。欢迎大家给我留言!

下面上个django的admin界面截图吧!





代码下载

/>

原文链接

阅读(1567) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~