# Created by Sefi Piscon import requests import datetime from bs4 import BeautifulSoup def translate_date_names(mystr): mystr = str(mystr) if mystr.find("January") != -1: mystr = mystr.replace("January", "Jan") return mystr elif mystr.find("February") != -1: mystr = mystr.replace("February", "Feb") return mystr elif mystr.find("March") != -1: mystr = mystr.replace("March", "Mar") return mystr elif mystr.find("April") != -1: mystr = mystr.replace("February", "Apr") return mystr elif mystr.find("May") != -1: mystr = mystr.replace("May", "May") return mystr elif mystr.find("June") != -1: mystr = mystr.replace("June", "Jun") return mystr elif mystr.find("July") != -1: mystr = mystr.replace("July", "Jul") return mystr elif mystr.find("August") != -1: mystr = mystr.replace("August", "Aug") return mystr elif mystr.find("September") != -1: mystr = mystr.replace("September", "Sep") return mystr elif mystr.find("October") != -1: mystr = mystr.replace("October", "Oct") return mystr elif mystr.find("November") != -1: mystr = mystr.replace("November", "Nov") return mystr elif mystr.find("December") != -1: mystr = mystr.replace("December", "Dec") return mystr return mystr url = 'https://www.lme.com/en-gb/metals/non-ferrous/#tabIndex=0' r = requests.get(url) soup = BeautifulSoup(r.text, 'html.parser') metal_table = soup.find('div', class_='table-wrapper') dateFromSite = soup.find('div', class_='delayed-date').text.strip() date_value = str(dateFromSite) print("Date from site: " + date_value) scvFile = open('source.csv', 'w') logFile = open('log.txt', 'w') htmlFile = open('data.html', 'w') statusCode = str(r.status_code) logFile.write("Last file date: " + date_value + "\n") logFile.write("Status code = " + statusCode + '\n') if not (statusCode == "200"): logFile.write("Please check url: " + url) # Change date format: %d - 2 digit date, %b - 3-letter month, %Y - 4 digit year, %m - 2 digit month date_value = date_value.replace('Data valid for ', '') tempDate = date_value tempDate = translate_date_names(tempDate) print("Date after translation: " + tempDate) date_value = datetime.datetime.strptime(tempDate, '%d %b %Y').strftime('%d/%m/%Y') xmlSource = open('source.xml', 'w') xmlSource.write('<?xml version="1.0" encoding="utf-8"?><LMEs>\n') print("Date format for source file: " + date_value) for metal in metal_table.find_all('tbody'): rows = metal.find_all('tr') for row in rows: xmlSource.write('\t<LME>\n') lineName = row.find('td').text.strip() temp = str(lineName).lower() temp = temp.replace("&", "") temp = temp.replace(" ", "") xmlSource.write('\t\t' + '<description>' + temp + '</description>\n') xmlSource.write('\t\t' + '<value_date>' + date_value + '</value_date>\n') AluminiumAlloy = row.find_all('td')[1].text.strip() xmlSource.write('\t\t' + '<aluminiumAlloy>' + AluminiumAlloy + '</aluminiumAlloy>\n') Aluminium = row.find_all('td')[2].text.strip() xmlSource.write('\t\t' + '<aluminium>' + Aluminium + '</aluminium>\n') Copper = row.find_all('td')[3].text.strip() xmlSource.write('\t\t' + '<copper>' + Copper + '</copper>\n') Lead = row.find_all('td')[4].text.strip() xmlSource.write('\t\t' + '<lead>' + Lead + '</lead>\n') Nickel = row.find_all('td')[5].text.strip() xmlSource.write('\t\t' + '<nickel>' + Nickel + '</nickel>\n') Tin = row.find_all('td')[6].text.strip() xmlSource.write('\t\t' + '<tin>' + Tin + '</tin>\n') Zinc = row.find_all('td')[7].text.strip() xmlSource.write('\t\t' + '<zinc>' + Zinc + '</zinc>\n') Nasaac = row.find_all('td')[8].text.strip() xmlSource.write('\t\t' + '<nasaac>' + Nasaac + '</nasaac>\n') print(temp + "," + date_value + "," + AluminiumAlloy + "," + Aluminium + "," + Copper + "," + Lead + "," + Nickel + "," + Tin + "," + Zinc + "," + Nasaac) scvFile.write(temp + "," + date_value + "," + AluminiumAlloy + "," + Aluminium + "," + Copper + "," + Lead + "," + Nickel + "," + Tin + "," + Zinc + "," + Nasaac + "\n") xmlSource.write('\t</LME>\n') xmlSource.write('</LMEs>\n') scvFile.close() logFile.close() htmlFile.close() xmlSource.close()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question