Chinaunix首页 | 论坛 | 博客
  • 博客访问: 481947
  • 博文数量: 59
  • 博客积分: 345
  • 博客等级: 二等列兵
  • 技术积分: 1380
  • 用 户 组: 普通用户
  • 注册时间: 2011-06-18 22:44
个人简介

to be myself

文章分类

全部博文(59)

文章存档

2017年(5)

2013年(47)

2012年(3)

2011年(4)

分类: Python/Ruby

2017-09-21 10:09:19

最新代码位置


点击(此处)折叠或打开

  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # @Author: Angrad

  4. import urllib.request
  5. import re
  6. import os

  7. resource_base=''
  8. search_base=resource_base+'/search?keywords='
  9. keywords='sendmail'
  10. search_suffix='&searchon=names&suite=zesty§ion=all'
  11. search_url = search_base+keywords+search_suffix

  12. allres=[]
  13. all_res_list=[]
  14. #arch=amd64
  15. arch='i386'

  16. def Init():
  17.     global search_url
  18.     search_url = search_base+keywords+search_suffix

  19. def GetDoc(url):
  20.     print("url: "+url)
  21.     con = urllib.request.urlopen(url)
  22.     doc = con.read()
  23.     con.close()
  24.     doc = doc.decode('utf-8')
  25.     return doc

  26. def FullUrl(url):
  27.     return resource_base+url

  28. def ResAlreadyExists(res):
  29.     list = res.split('/')
  30.     if list[-1] not in allres:
  31.         allres.append(list[-1])
  32.         return False
  33.     return True

  34. def SaveFile():
  35.     file_object = open(keywords+'_res_url.txt', 'w')
  36.     file_object.writelines(all_res_list)
  37.     file_object.close( )

  38. def Download():
  39.     print("start to download all res")
  40.     dir = keywords + "_download"
  41.     if not os.path.exists(dir):
  42.         os.makedirs(dir)
  43.     for res in all_res_list:
  44.         res = res.strip("\n")
  45.         name = res.split('/')
  46.         print("download "+dir+"\\"+name[-1])
  47.         urllib.request.urlretrieve(res, dir+"\\"+name[-1])

  48. def GetArchUrl(res_str_list):
  49.     index = 0
  50.     
  51.     #ARCH all
  52.     if len(res_str_list) == 1:
  53.         return index

  54.     for res in res_str_list:
  55.         list = res.split('/')
  56.         #list[2] amd64
  57.         #['', 'zesty', 'arm64', 'libc6', 'download']
  58.         if list[2] == arch:
  59.             return index
  60.         index = index+1
  61.     return index

  62. def GetRes(url):
  63.     #self_res_str[0]
  64.     doc_orig = GetDoc(url)
  65.     self_res_pattern=r'
  66. = re.findall(self_res_pattern, doc_orig)
  67. = GetArchUrl(self_res_str)
  68. = FullUrl(self_res_str[index])
  69. :https://packages.ubuntu.com/zesty/all/sendmail/download
  70. print("self res:"+self_res_str)
  71. print("\n")
  72. =r'
  73. = GetDoc(self_res_str)
  74. = re.findall(self_deb_pattern, doc)
  75. .append(self_deb_str[0]+"\n")
  76. :http://mirrors.kernel.org/ubuntu/pool/universe/s/sendmail/sendmail_8.15.2-8ubuntu1_all.deb
  77. print("deb url:"+self_deb_str[0])
  78. print("\n")
  79. [0][1]
  80. =r'(?<=(dep:))[\s]*
  81. = re.findall(dep_res_pattern, doc_orig)
  82. for r in dep_res_str:
  83. if ResAlreadyExists(r[1]):
  84. return
  85. = FullUrl(r[1])
  86. :https://packages.ubuntu.com/zesty/sendmail-base
  87. print("dep res:"+dep_url)
  88. (dep_url)
  89. print("\n")
  90. = input("Input key word: ")
  91. ()
  92. : https://packages.ubuntu.com/search?keywords=sendmail&searchon=names&suite=zesty&section=all
  93. = GetDoc(search_url)
  94. [0]
  95. = r'
  96. = re.findall(first_page_pattern, doc)
  97. if len(first_page_str) < 1:
  98. print("no such res: " + keywords)
  99. ._exit(0)
  100. = FullUrl(first_page_str[0])
  101. :https://packages.ubuntu.com/zesty/sendmail
  102. print("first page:"+first_page_str)
  103. print("\n")
  104. (first_page_str)
  105. ()
  106. ()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Author: Angrad

import urllib.request
import re
import os

resource_base=''
search_base=resource_base+'/search?keywords='
keywords='sendmail'
search_suffix='&searchon=names&suite=zesty§ion=all'
search_url = search_base+keywords+search_suffix

allres=[]
all_res_list=[]
#arch=amd64
arch='i386'

def Init():
    global search_url
    search_url = search_base+keywords+search_suffix

def GetDoc(url):
    print("url: "+url)
    con = urllib.request.urlopen(url)
    doc = con.read()
    con.close()
    doc = doc.decode('utf-8')
    return doc

def FullUrl(url):
    return resource_base+url

def ResAlreadyExists(res):
    list = res.split('/')
    if list[-1] not in allres:
        allres.append(list[-1])
        return False
    return True

def SaveFile():
    file_object = open(keywords+'_res_url.txt', 'w')
    file_object.writelines(all_res_list)
    file_object.close( )

def Download():
    print("start to download all res")
    dir = keywords + "_download"
    if not os.path.exists(dir):
        os.makedirs(dir)
    for res in all_res_list:
        res = res.strip("\n")
        name = res.split('/')
        print("download  "+dir+"\\"+name[-1])
        urllib.request.urlretrieve(res, dir+"\\"+name[-1])

def GetArchUrl(res_str_list):
    index = 0
   
    #ARCH all
    if len(res_str_list) == 1:
        return index

    for res in res_str_list:
        list = res.split('/')
        #list[2] amd64
        #['', 'zesty', 'arm64', 'libc6', 'download']
        if list[2] == arch:
            return index
        index = index+1
    return index

def GetRes(url):
    #self_res_str[0]
    doc_orig = GetDoc(url)
    self_res_pattern=r'
    index = GetArchUrl(self_res_str)
    self_res_str = FullUrl(self_res_str[index])
    #self res:/zesty/all/sendmail/download
    print("self res:"+self_res_str)
    print("\n")

    #self_deb
    self_deb_pattern=r'
  •     self_deb_str = re.findall(self_deb_pattern, doc)
        all_res_list.append(self_deb_str[0]+"\n")
        #deb url:
        print("deb url:"+self_deb_str[0])
        print("\n")
       
        #dep_res_str[0][1]
        dep_res_pattern=r'(?<=(dep:))[\s]*
        for r in dep_res_str:
            if ResAlreadyExists(r[1]):
                return
            dep_url = FullUrl(r[1])
            #dep res:/zesty/sendmail-base
            print("dep res:"+dep_url)
            GetRes(dep_url)
        print("\n")

    keywords = input("Input key word: ")
    Init()
    #search_url: /search?keywords=sendmail&searchon=names&suite=zesty§ion=all
    doc = GetDoc(search_url)

    #first_page_str[0]
    first_page_pattern = r'

    if len(first_page_str) < 1:
        print("no such res: " + keywords)
        os._exit(0)

    first_page_str = FullUrl(first_page_str[0])
    #first page:/zesty/sendmail
    print("first page:"+first_page_str)
    print("\n")

    GetRes(first_page_str)
    SaveFile()
    Download()

  • 阅读(2011) | 评论(2) | 转发(0) |
    0

    上一篇:AStyle 选项

    下一篇:没有了

    给主人留下些什么吧!~~

    angrad2017-09-21 11:22:01

    angrad:Line 59:  self_res_pattern=r\'<th><a href=\\\"([a-zA-Z0-9\\.\\/\\-]+)\\\"\'
     Line 66:  self_deb_pattern=r\'<li><a href=\\\"([a-zA-Z0-9\\.\\/\\-:_+]+)\\\"\'
     Line 74:  dep_res_pattern=r\'(?<=(dep:</span>))[\\s]*<a href=\\\"([a-zA-Z0-9\\.\\/\\-]+)\\\"\'

    确保上面三个pattern是这些字符

    回复 | 举报

    angrad2017-09-21 11:21:45

    Line 59:  self_res_pattern=r\'<th><a href=\\\"([a-zA-Z0-9\\.\\/\\-]+)\\\"\'
     Line 66:  self_deb_pattern=r\'<li><a href=\\\"([a-zA-Z0-9\\.\\/\\-:_+]+)\\\"\'
     Line 74:  dep_res_pattern=r\'(?<=(dep:</span>))[\\s]*<a href=\\\"([a-zA-Z0-9\\.\\/\\-]+)\\\"\'