BeautifulSoup: Scraping Wikipedia

From Glitchdata
Jump to navigation Jump to search
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup

article= "Albert Einstein"
article = urllib.quote(article)

opener = urllib2.build_opener()
opener.addheaders = [('User-agent', 'Mozilla/5.0')] #wikipedia needs this

resource = opener.open("http://en.wikipedia.org/wiki/" + article)
data = resource.read()
resource.close()
soup = BeautifulSoup(data)
print soup.find('div',id="bodyContent").p


Links