auto tracker thing | Python Fiddle

import urllib2
import re
from time import time

from bs4 import BeautifulSoup

# takes a url as input, extract all urls from the input's web page's html source
def extract_urls(page):
    html = urllib2.urlopen(page).read()
    url_re = re.compile(r'(?<=href=").*?(?=")')
    for match in url_re.finditer(html):
        yield match.group(0)

def crawl_web(seed):
    tocrawl = [seed]
    crawled = []
    while tocrawl:
        page = tocrawl.pop()
        if page not in crawled:
            for uri in extract_urls(page):
                tocrawl.append(uri)
            crawled.append(page)
    return crawled

source_urls = list()

# get list of top-tier news sites to check from text file sources.txt
sources_text = open('sources.txt', 'r')
for line in sources_text:
    source_urls.append(line)

sources_text.close()

# *** BEGIN SCANNING ***
while 1==1:

all_stories = list()
 matching_stories = list()
 new_matching_stories = list()
 num_searched = 0
 num_matched = 0
 num_new_matched = 0

t0= time() # start the clock

# for each media source, get all the links on the home page
 for link in source_urls:
     for uri in extract_urls(link): # gather all current front-page stories
         all_stories.append(uri.lower())
         num_searched += 1

# for each link retrieved, check if it contains the keywords
 for story in all_stories:
     try:
         # more sophisticated matching analysis will go here
         if "romney" in story or "obama" in story or "convention" in story or "hurricane" in story:
             #print "match found!"
             matching_stories.append(story)
             num_matched += 1
     except:
         print "an error occurred"

# for each matching story collected, make sure it's not a duplicate.  add to old list if it's new
 old_matches = open('oldmatches.txt', 'r').read()

for story in matching_stories:
     if not story in old_matches:
         new_matching_stories.append(story)
         num_new_matched += 1
        
 #old_matches.close()
 mod_matches = open('oldmatches.txt', 'a')

for match in new_matching_stories:
     mod_matches.write(match + "\n")
     print match

# output: report results
 t = time() - t0 
 print str(num_searched) + " links scanned in " + str(t) + "seconds"
 print str(num_matched) + " matches found"
 print str(num_new_matched) + " new matches found\n\n"

mod_matches.close()

Python Fiddle

Python Cloud IDE