Monday, February 25, 2013

My First Crawler

import urllib2
from pyquery import PyQuery as pq

url = "http://api.jquery.com/category/selectors/"
result = urllib2.urlopen(url).read().decode("utf8")
q = pq(result)
result_of_bookmarks = q('a[rel="bookmark"]')
bookmarks = result_of_bookmarks.map(lambda i, e: pq(e).text())
'''
for item in bookmarks:
    print item.encode('utf8')

'''

result_of_sumaries = q('div[class="entry-summary"]')
sumaries = result_of_sumaries.map(lambda i, e: pq(e).text())
'''
for item in sumaries:
    print item.encode('utf8')
'''
Query = {}
for i in range(len(bookmarks)):
    Query[bookmarks[i]] = sumaries[i]

No comments:

Post a Comment