# Written by Luke Davis under the MIT License (MIT) for select parties # To be used solely for educational purposes # -*- coding: utf-8 -*- from __future__ import print_function from lxml.html import fromstring import requests import time # Log file location, change "z:\shopify_output.txt" to your location. logFileLocation = "z:\shopify_output.json" log = open(logFileLocation, "w") print ('Written solely by Luke Davis (@R8T3D)') url = 'http://' + raw_input("Enter Shopify website URL: ") + '/sitemap_products_1.xml' print ('Scraping! Check log file @ ' + logFileLocation + ' to see output.') print ("!!! Also make sure to clear file every hour or so !!!") page = requests.get(url) tree = fromstring(page.content) url_tags = tree.xpath("//url[image]") data = [(e.xpath("./image/title//text()")[0],e.xpath("./loc/text()")[0]) for e in url_tags] while True: for prod, url in data: page = requests.get(url + ".xml") tree = fromstring(page.content) variants = tree.xpath("//variants[@type='array']//id[@type='integer']//text()") print("Product URL: " + url) print(prod.encode("utf-8"), variants) print('') print("Product URL: " + url, file = log) print(prod.encode("utf-8"), variants, file = log) print('', file = log)
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question