-
# encoding:utf-8
-
import sys
-
import re
-
import pymssql
-
from urllib2 import Request, urlopen, URLError, HTTPError
-
-
def get_packet(url):
-
packet = urlopen(url)
-
content = packet.read()
-
return content
-
def get_data(packet):
-
global items
-
global items2
-
xiangmu = '~'
-
tmp = re.findall(r'(.*)',packet)
-
tmp2 = re.findall('
(.*) | ',packet)
-
if tmp is not None:
-
items = tmp
-
if tmp2 is not None :
-
items2 = [x for x in tmp2 if '&' not in x]
-
def save_data():
-
-
server = "A853F747DDE4458\SQLEXPRESS"
-
user = "sa"
-
password = "123456"
-
-
conn = pymssql.connect(server, user, password, "stock")
-
cursor = conn.cursor()
-
sql = "INSERT INTO assets_and_liabilities VALUES (%d, %s, %s, %s)"
-
j=2;
-
for i in range(0,len(items)):
-
cursor.execute(sql,(ID, items2[0], items[i] ,items2[i+j]))
-
j=j+1
-
# you must call commit() to persist your data if you don't set autocommit to True
-
conn.commit()
-
conn.close()
-
if __name__=='__main__':
-
url = 'http://stock.finance.qq.com/corp1/cbsheet.php?zqdm=600787&type=2014'
-
packet = get_packet(url)
-
items = []
-
items2 = []
-
ID = 600787
-
if packet =='~
此次代码更新了以下内容:
1、对抓取到的数据进行处理,用语句
-
if tmp2 is not None :
-
items2 = [x for x in tmp2 if '&' not in x]
对“
”这个数据作丢弃处理。新生成一个列表赋值给items2.
2、增加写入数据库的语句,把数据批量插入数据库表中,通过导入pymssql库,即可用python操作mssql数据库。
3、修正items和items2为全局变量,在函数中加global
items即可,否则在get_data函数中给items和items2赋值后,运行到save_data函数时,items和items2仍然是初始
值。get_data的赋值会不起作用。(此问题困扰了好久,真失败。)
阅读(1617) | 评论(0) | 转发(0) |