import re import json import urllib import time from datetime import datetime from BeautifulSoup import BeautifulSoup # for concatenating later baseurl = "http://www.fallsviewcasinoresort.com/entertainment" # open up the listings and read it to pass into the soup basehtml = urllib.urlopen("http://www.fallsviewcasinoresort.com/entertainment/listings").read() soup = BeautifulSoup(basehtml, "html.parser") soup.encode('utf-8') soup.encode('ascii', 'ignore') # check for list-item cells, which is how the HTML is structured in this current iteration data={} for each_div in soup.findAll("div", {"class":"list-item cell"}): # grab links for every_link in each_div.findAll('a'): # print "-----------" # print baseurl + every_link.get('href') every_link = baseurl + every_link.get('href').encode('ascii', 'ignore') # grab images for every_img in each_div.findAll('img'): # print "-----------" # print 'http://www.fallsviewcasinoresort.com' + every_img.get('src') every_img = 'http://www.fallsviewcasinoresort.com' + every_img.get('src').encode('ascii', 'ignore') # grab names all_names = each_div.findAll('h3') # print all_names for every_name in all_names[0:]: # # print "-----------" # print every_name.get_text().encode('utf-8') every_name = every_name.get_text().encode('ascii', 'ignore') data['listing_link'] = every_link data['listing_img'] = every_img data['listing_name'] = every_name # print data with open('listings.json', 'w') as outfile: json.dump(data, outfile )
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question