BeautifulSoup: Scraping Wikipedia

From Glitchdata
Revision as of 05:57, 28 July 2014 by Admin (talk | contribs) (Created page with "<pre> import urllib import urllib2 from BeautifulSoup import BeautifulSoup article= "Albert Einstein" article = urllib.quote(article) opener = urllib2.build_opener() opener....")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup

article= "Albert Einstein"
article = urllib.quote(article)

opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')] #wikipedia needs this

resource = opener.open("http://en.wikipedia.org/wiki/" + article)
data = resource.read()
resource.close()
soup = BeautifulSoup(data)
print soup.find('div',id="bodyContent").p


Links