import requests import re from pprint import pprint # Globals url = 'http://pages.cs.wisc.edu/~remzi/OSTEP/' def write_nums(file, start, end, num_to_suffix): """ Start & end are inclusive """ for i in range(start, end + 1): pdf_url = url + num_to_suffix[i] r = requests.get(pdf_url) file.write(r.content) def write_urls(file, urls): for suffix in urls: pdf_url = url + suffix r = requests.get(pdf_url) file.write(r.content) def main(): r = requests.get(url) rows = [line for line in r.text.split('\n') if line.find('.pdf') > 0] book_num = 0 num_to_suffix = dict() firsts = [] appendices_suffixes = [] # Book files intro = open('intro.pdf', 'ab+') virtualization = open('virtualization.pdf', 'ab+') concurrency = open('concurrency.pdf', 'ab+') persistence = open('persistence.pdf', 'ab+') appendices = open('appendices.pdf', 'ab+') files = [intro, virtualization, concurrency, persistence, appendices] for row in rows: match = re.match(r'.+<small>(\d+).+?href=(\S+)', row) link = re.match('.+?href=(\S+)', row) if match: num = int(match.group(1)) suffix = str(match.group(2)) num_to_suffix[num] = suffix elif link: link = str(link.group(1)) if link.find('preface') > -1 or link.find('toc') > -1: firsts.append(link) else: appendices_suffixes.append(link) # Intro (has preface & TOC) write_urls(intro, firsts) write_nums(intro, 1, 2, num_to_suffix) # Virtualization write_nums(virtualization, 3, 24, num_to_suffix) # Concurrency write_nums(concurrency, 25, 34, num_to_suffix) # Persistence write_nums(persistence, 35, 50, num_to_suffix) # Appendices write_urls(appendices, appendices_suffixes) # Close files for file in files: file.close() if __name__ == '__main__': main()
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question