Web Crawler Program | Python Fiddle

## returns the next 'url' and the end position of that
## so that we start looking for the next url
def get_next_target(page):
    start_link = page.find('<a href=')
    if start_link == -1:
        return None, 0
    start_quote = page.find('"', start_link)
    end_quote = page.find('"', start_quote + 1)
    url = page[start_quote + 1:end_quote]
    return url, end_quote

## compiles a list of all links on 'page'
def get_all_links(page):
    links = []
    while page:
        url, endpos = get_next_target(page)
        if url:
            links.append(url)
            page = page[endpos:]
        else:
            break
	return links

## removes element x from list p and returns new p
def remove_element(x, p):
    if x not in p:
        return p
    tempList = []
    for i in p:
        if i != x:
            tempList.append(i)
    p = tempList
    return p

## starts from seed page and compiles 2 lists:
#  toCrawl = links left to crawl
#  crawled = list of crawled links
def crawl_web(seed):
    toCrawl = [seed]
    crawled = []
    while toCrawl:
        for x in toCrawl:
            if x in crawled:
                toCrawl = remove_element(x, toCrawl)
                break
			crawled = crawled + get_all_links(x)
            remove_element(x, toCrawl)
    return crawled
            
    
# should return a list of all links on input
# print get_all_links('Here is a <a href="http://udacity.com">link')

crawl_web(

Browser Version Not Supported

Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+ Firefox 4+ Safari 5+
IE 10+

Let me try anyway!

Python Fiddle

Python Cloud IDE