Chinaunix首页 | 论坛 | 博客
  • 博客访问: 123817
  • 博文数量: 31
  • 博客积分: 691
  • 博客等级: 中士
  • 技术积分: 245
  • 用 户 组: 普通用户
  • 注册时间: 2011-04-16 16:45
文章分类

全部博文(31)

文章存档

2012年(4)

2011年(27)

分类: LINUX

2012-03-12 16:46:05

    好久没有更新blog了,一方面因为还在学习python,又在看linux的网络内核,真心忙不过来。学习python有那么一段时间了,感觉它的内容真的很多阿,还得继续努力。以前用bash写过一个终端下的在线词典的东西。后来学习python就一直想过用python重构一下。下面是源码,基本思路都一样,只是还没有想到一种好的数据结构进行历史内容的存储。其实有想过用pickle,但是每次都得将其全部导入内存,就像fcitx的词库一样。PS:我重新编译过fcitx的词库,导致其占用内存很大。唉,任何事情都是两面的,每一种所谓的最佳解决办法,其实就是一种折中罢了。所以,我就使用了原来扒取的xml,这样再次在本地读取的时候就少了很多事。

      对了还有一点就是,本来有个比较好的想法的,就是利用和OS一样查找寄存器和内存的并行操作来解决,查找本地历史记录和网上结果,谁先给出结果就kill掉另一个,但是threading只是提供创建线程,而且它忽略了函数返回值,我想过用一个全局变量来作为一个标志,但是当时用锁的时候,其他申请锁的线程都阻塞了,这样还是没有达到我的想法。如果你有好的思路希望能分享一下。愿与君共勉之。

点击(此处)折叠或打开

  1. #!/usr/bin/python
  2. #encoding=utf-8
  3. #===============================================================================
  4. #
  5. # FILE: pdic
  6. #
  7. # USAGE: pdic [word]
  8. #
  9. # DESCRIPTION: The dictionary based on Terminal
  10. #
  11. # OPTIONS: ---
  12. # REQUIREMENTS: ---
  13. # BUGS: ---
  14. # NOTES: ---
  15. # AUTHOR: Eric Yu , linuxer.yu@gmail.com
  16. # LICENCE: GPL
  17. # CREATED:
  18. # REVISION: ---
  19. #===============================================================================

  20. from xml.etree import ElementTree
  21. import threading
  22. import re
  23. import os
  24. import sys
  25. import urllib2

  26. '''The word you want to search\
  27.         and the place thar stored the word directory'''
  28. word = ''
  29. word_dir = r'word_dir'

  30. #content = urllib2.urlopen(r'%s'% word)

  31. #mean = content.read()

  32. #file = open('file','w+')
  33. #file.write(mean)
  34. #file.close()

  35. '''Use to replace , '''
  36. parleft = re.compile(r'')
  37. parright = re.compile(r'')

  38. def print_node(node):
  39.     '''Show the meaning of the word\
  40.             and replace , '''
  41.     string = re.sub(parleft,'<',node.text)
  42.     text = re.sub(parright,'>',string)
  43.     print '%s'% text


  44. #read_xml(mean)

  45. def Judge():
  46.     '''判断是否存在字典文件夹,不存在建立'''
  47.     global word_dir
  48.     if os.path.exists(word_dir):
  49.         if os.path.isdir(word_dir):
  50.             pass
  51.         else:
  52.             print "There is file named %s"% word_dir
  53.             return 0
  54.     else:
  55.         try:
  56.             os.mkdir(word_dir)
  57.         except:
  58.             print "Fail to create the directory"
  59.             return 0
  60.         else:
  61.             print "First Use:Created the directory"
  62.             return 1

  63. def Usage():
  64.     '''判断输入是否正确'''
  65.     if len(sys.argv) != 2:
  66.         print "Usage: %s [word]"% sys.argv[0]
  67.         return 0
  68.     else:
  69.         global word
  70.         word = sys.argv[1]

  71. class Word:
  72.     def __init__(self,word):
  73.         self.word = word
  74.         self.url = r'%s'% self.word

  75.     def GetUrl(self):
  76.         global word_dir
  77.         file = word_dir + '/' + self.word + '.xml'
  78.         try:
  79.             content = urllib2.urlopen(self.url)
  80.         except:
  81.             print "Fail to download"
  82.             return 0
  83.         self.mean = content.read()
  84.         if re.search(r'|Not Found',self.mean):
  85.             return 1
  86.         else:
  87.             wf = open(file,'w')
  88.             wf.write(self.mean)
  89.             wf.close()
  90.             if self.mean == '':
  91.                 print "Fail to download"
  92.                 return 0

  93.     def Read_xml(self):
  94.         root = ElementTree.fromstring(self.mean)
  95.         '''root.find() 返回的为None。'''
  96.         node_find = root.getiterator('def')
  97.         if not node_find:
  98.             pass
  99.         else:
  100.             for i in node_find:
  101.                 print_node(i)

  102.         lst_node = root.getiterator("sent")
  103.         if not lst_node:
  104.             pass
  105.         else:
  106.             for i in lst_node:
  107.                 child = i.getchildren()
  108.                 for j in child:
  109.                     lst_node_child = j
  110.                     print_node(lst_node_child)

  111.         node_sugg = root.getiterator('sugg')
  112.         if not node_sugg:
  113.             pass
  114.         else:
  115.             for i in node_sugg:
  116.                 print_node(i)

  117.     def local_xml(self):
  118.         '''Read the word file local'''
  119.         global word_dir
  120.         file = word_dir + '/' + self.word + '.xml'
  121.         if os.path.exists(file) and os.path.isfile(file):
  122.             wf = open(file)
  123.             self.mean = wf.read()
  124.             if not len(self.mean):
  125.                 return 0
  126.             wf.close()
  127.             return 1
  128.         return 0

  129. def main():
  130.     if Usage() == 0:
  131.         sys.exit()
  132.     if Judge() == 0:
  133.         sys.exit()
  134.     tran = Word(word)
  135.     if tran.local_xml() == 0:
  136.         if tran.GetUrl() == 0:
  137.             print 'Error'
  138.             sys.exit()
  139.         else:
  140.             tran.Read_xml()
  141.     else:
  142.         tran.Read_xml()
  143.     return 1


  144. if __name__ == '__main__':
  145.     main()


阅读(2079) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~