Chinaunix首页 | 论坛 | 博客
  • 博客访问: 124733
  • 博文数量: 31
  • 博客积分: 691
  • 博客等级: 中士
  • 技术积分: 245
  • 用 户 组: 普通用户
  • 注册时间: 2011-04-16 16:45
文章分类

全部博文(31)

文章存档

2012年(4)

2011年(27)

分类: LINUX

2011-09-22 12:41:13

   前段时间写过一个终端下的词典,最近突发奇想,将其数据的组织方式重新改进。大概的改进有以下几点。

1. 将操作函数化。

2. 将数据进行重组。把相同首字母的单词放在一个文件下。利于以后对查询结果的再利用。

3. 对单词进行标志。将查找次数多的单词优先排序。即排放在文件开始部分。

4. 舍弃对中文的存储。

5. 增加网络发音,而不是单纯的espeak。


source code。

  1. #!/bin/bash
  2. # This is a dictionary based for Termial.
  3. # Its cache directory is in ~/.ydictionary/
  4. PS3='Your choice is : '
  5. word="$*"
  6. first_alp=$(echo "$1" | cut -c 1)
  7. cache="/home/echo/.tdic_data/${first_alp}"


  8. # read this word
  9. read_word()
  10. {
  11. re_word="$1"
  12. mplayer "" >/dev/null 2>&1
  13. if [ $? ]
  14. then
  15. :
  16. else
  17. espeak -s 130 "$re_word" 1>/dev/null 2>&1
  18. fi
  19. exit 0
  20. }

  21. # judge whether read this word.
  22. judge_read()
  23. {
  24. readword="$1"
  25. clew="Read it?(y/n)y:"
  26. echo -ne "\033[35;1m$clew\033[00m"
  27. read answer
  28. : ${answer:=y}
  29. if [ "$answer" = "y" ];then
  30. read_word "$readword"
  31. else
  32. exit 5
  33. fi
  34. }


  35. # local cache

  36. local_search()
  37. {
  38. if [ -d ${cache%/*} ];then
  39. if [ -e "${cache}" ];then
  40. word2=$(sed -n "/^[[:digit:]]\{1,\}[ ]\<$word\>/,/^$/p" "$cache")
  41. if [ -z "$word2" ];then
  42. net_search
  43. exit 0
  44. else
  45. Word2=$(echo "$word2" | sed 's/^[[:digit:]]*//g')
  46. echo -e "\033[34;1m$Word2\033[0m"
  47. echo
  48. num=$(echo "$word2" | awk '{if(NR==1)print $1}')
  49. let num+=1
  50. sed -i "/^[[:digit:]]\{1,\}[ ]\<$word\>/s/^[[:digit:]]\{1,\}/$num/" "$cache"
  51. sort_file &
  52. judge_read "$word"
  53. exit 0
  54. fi
  55. else
  56. net_search
  57. eixt 0
  58. fi
  59. #elif [ ! -e ${cache%/*} ];then
  60. else
  61. mkdir -p ${cache%/*}
  62. local_search
  63. fi
  64. }

  65. # net_search
  66. # 抓取网页快照。

  67. net_search()
  68. {

  69. wordf=$(wget -q "" -O -)

  70. # 截取指定字符段。
  71. # 判断是否有单词匹配,如果没有给出相近的。否则就进行查询。
  72. if echo "$wordf"|grep 'sugg' 1>/dev/null 2>&1
  73. then
  74. echo "You may want to search these words."
  75. sword=$(echo "$wordf"|sed -e 's/<[\/]*sugg>//g'|grep '^[^<]'|tr ["\n"] ["\t"])
  76. select guessw in $sword ;do
  77. if [ $guessw ];then
  78. echo -e "\033[36;1m$guessw\033[0m"
  79. tdic $guessw
  80. exit 2
  81. else
  82. read -n 1 -p "Do you want to try again:(y/n)y:" choice
  83. : ${choice:=y}
  84. echo
  85. if [ "$choice" = "y" ] || [ "$choice" = "Y" ];then
  86. echo 'Please input again:'
  87. else
  88. exit 3
  89. fi
  90. fi
  91. done
  92. else
  93. word1=$(echo "$wordf"|sed -e 's/<[\/]*\(def\|sent\|orig\|trans\)>//g' -e 's/<em>\(.*\)<\/em>/( \1 )/g' -e 's/\(>\|<\)/ /g'|grep '^[^<]')
  94. echo -e "\033[32;1m$word1\033[0m"
  95. jd_save=$(echo "$word" | sed -n '/[a-zA-Z]\{1,\}$/p')
  96. if [ ! -z "$jd_save" ];then
  97. echo -e "1 $word\n$word1\n">>"$cache"
  98. fi
  99. echo
  100. judge_read "$word"

  101. #cache如果大于一百兆,提示用户。进行释放。
  102. total=$(du ${cache%/*}|cut -d/ -f 1)
  103. if [ $total -gt 102400 ];then
  104. echo
  105. echo -e "\tThe dictionary cache is beyond 100M.Maybe you can release some space."
  106. fi
  107. fi
  108. exit 0
  109. }

  110. # sort file

  111. sort_file()
  112. {
  113. list=$(awk '{if($0 ~/^[[:digit:]]+[ ].*/) print $0}' "$cache" | sort -nr)
  114. echo "$list">/tmp/$$
  115. while read a b
  116. do
  117. w=`echo "$a $b"`
  118. sed -n "/\<$w\>/,/^$/p" "$cache" >> ${cache}.bak
  119. done
  120. rm -f /tmp/$$
  121. rm -f ${cache}
  122. mv ${cache}.bak ${cache}
  123. }

  124. # main function

  125. if [ $# -lt 1 ];then
  126. echo
  127. echo "usag: `basename $0` [word]"
  128. echo
  129. exit 1
  130. fi

  131. # 优先进行本地搜索
  132. local_search

  133. exit 0

阅读(2206) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~