from bs4 import BeautifulSoup import requests findMetaUrls = [ "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=DDoS", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Financial%20Services", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Government", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=File%20Security", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Advanced%20Targeted%20Attacks", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Web%20Attacks", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Compliance", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Web%20Application%20Security", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Fraud", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Web%20Attacks", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Fraud", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Web%20Application%20Security", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=File%20Security", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Compliance", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Web%20Attacks", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Services", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=SharePoint%20Security", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=DDoS", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=DDoS", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Database%20Security", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Database%20Security", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Database%20Security", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Compliance", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Web%20Application%20Security", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Services", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Services", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Cloud", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Cloud", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Web%20Attacks", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Advanced%20Targeted%20Attacks", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Advanced%20Targeted%20Attacks", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Fraud", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Financial%20Services", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Government", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=File%20Security", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Cloud", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Services", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Cloud", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Cloud", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=File%20Security", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Services", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Financial%20Services", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Fraud", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Fraud", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Advanced%20Targeted%20Attacks", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Government", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Database%20Security", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=SharePoint%20Security", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=DDoS", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Compliance", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=SharePoint%20Security", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=File%20Security", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Database%20Security", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Fraud", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Web%20Application%20Security", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Services", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Web%20Attacks", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=Government", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=SharePoint%20Security", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Web%20Attacks", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Advanced%20Targeted%20Attacks", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=File%20Security", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Database%20Security", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Cloud", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Web%20Application%20Security", "https://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=DDoS", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Compliance", "http://colo-www-wd3:8002/DefenseCenter/Waar?tag=SharePoint%20Security", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Compliance", "http://colo-www-wd3:8002/DefenseCenter/ThreatAdvisories?tag=Advanced%20Targeted%20Attacks", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=SharePoint%20Security", "https://colo-www-wd3:8002/DefenseCenter/Waar?tag=Financial%20Services", "https://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=DDoS", "http://colo-www-wd3:8002/DefenseCenter/HackerIntelligenceReports?tag=Web%20Application%20Security", ] withOutMeta = [] for x in range(0,len(findMetaUrls)): try: req = requests.get(findMetaUrls[x], allow_redirects=True) #response = urllib2.urlopen(req) the_page = req.text soup = BeautifulSoup(the_page, 'lxml') tags = soup.findAll("meta", {"name": "robots", "content": "noindex,follow"}) for tag in tags: if tag == None: withOutMeta.append(findMetaUrls[x]) except Exception, detail: print "Err ", detail print withOutMeta
Run
Reset
Share
Import
Link
Embed
Language▼
English
中文
Python Fiddle
Python Cloud IDE
Follow @python_fiddle
Browser Version Not Supported
Due to Python Fiddle's reliance on advanced JavaScript techniques, older browsers might have problems running it correctly. Please download the latest version of your favourite browser.
Chrome 10+
Firefox 4+
Safari 5+
IE 10+
Let me try anyway!
url:
Go
Python Snippet
Stackoverflow Question