import requests import urllib2 from bs4 import BeautifulSoup year = 2004 #create comma-delim file f = open(str(year) + '_LAXwunder_data.txt','w') #change the year here, ->run #iterate through month and day for m in range(1,13): for d in range(1,32): #could step 5 days using range(1,32,2) #Chk if already gone through month if (m == 2 and d > 28): break elif (m in [4,6,9,11]) and d > 30: break # open wug url timestamp = str(year)+'.'+str(m)+'.'+str(d) print 'Getting data for ' + timestamp url = 'http://www.wunderground.com/history/airport/LAX/'+str(year) + '/' + str(m) + '/' + str(d) + '/DailyHistory.html' page = urllib2.urlopen(url) #Get temp from page soup = BeautifulSoup(page) #dayTemp = soup.body.wx-data.b.string dayTemp = soup.findAll(attrs = {'class':'wx-data'})[5].span.string humidity = soup.find(text='Average Humidity') next_cell = humidity.find_parent('td').find_next_sibling('td') avg_humidity = next_cell.string #Format month for timestamp if len(str(m)) < 2: mStamp = '0' + str(m) else: mStamp = str(m) #Format day for timestamp if len(str(d)) < 2: dStamp = '0' + str(d) else: dStamp = str(d) #Build timestamp timestamp = str(year)+ mStamp + dStamp #Wrtie timestamp and temp to file f.write(timestamp + ',' + dayTemp + ',' + avg_humidity + '\n') print dayTemp, avg_humidity #done - close f.close()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question