#!/usr/bin/python import urllib2 from bs4 import BeautifulSoup import re #regex matching soup = BeautifulSoup(urllib2.urlopen('http://wmata.com/rider_tools/pids/showpid.cfm?station_id=43').read()) def clean_minutes(x): m = re.compile('[\f\t\n\r]*+\d(\d)?+[\f\t\n\r]*') p = m.match(x) if p: x.replace('(\s)*', '') #pull out the gibberish from the numbers print "clean done" else: print m return 0 table = soup.find("table") for row in table.findAll('tr')[1:]: col = row.findAll('td') cars = col[1].string destination = col[2].string mins = col[3].string clean_minutes(mins) #send mins to clean_minutes to error correct and stop BRD error out #if destination == "Mt. Vernon Square": # record = (cars, destination, mins) # print "|".join(record) #else: # print "-----------"
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question