import re class Browser: def __init__(self): import cookielib self._cookies = None self.cookies = cookielib.LWPCookieJar() self.content = None self.status = None def create_cookies(self, payload): import urllib self._cookies = urllib.urlencode(payload) def open(self,url): import urllib2 result = True if self._cookies is not None: req = urllib2.Request(url,self._cookies) self._cookies = None else: req = urllib2.Request(url) req.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.71 Safari/537.36') req.add_header("Accept-Encoding", "gzip") opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cookies))#open cookie jar try: response = opener.open(req) # send cookies and open url #borrow from provider.py Steeve if response.headers.get("Content-Encoding", "") == "gzip": import zlib self.content = zlib.decompressobj(16 + zlib.MAX_WBITS).decompress(response.read()) else: self.content = response.read() response.close() self.status = 200 except urllib2.URLError as e: self.status = e.reason result = False except urllib2.HTTPError as e: self.status = e.code result = False return result browser = Browser() texto="interstellar" url = "http://www.newpct1.com/index.php?page=buscar&q=%s&ordenar=Nombre&inon=Ascendente" % texto pattern = r'<a\shref=[\'"]?([^\'" >]+%s)' % texto browser.open(url) datos = browser.content for item in enumerate(re.findall(pattern, datos)): print item print url
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question