1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
<code> 对比 re lxml BeautifulSoup的解析速度 #coding:utf-8 import requests, re, sys,time from bs4 import BeautifulSoup as bs4 # reload(sys) # sys.setdefaultencoding("utf-8") from lxml import etree from pyquery import PyQuery as pq t1 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").text pa = re.compile("<title>(.*?)</title>",re.S) print(re.search(pa,html).group(1),i) t2= time.time() t3 = t2-t1 time.sleep(30) t4 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content html = etree.HTML(html) print(html.xpath("//title/text()")[0],i) t5= time.time() t6 = t5-t4 t7= time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content soup = bs4(html,'lxml') print(soup.title.get_text(),i) t8= time.time() t9= t8-t7 # t10 = time.time() for i in range(1000): html = requests.get("http://cuiqingcai.com/2621.html").content doc = pq(html) print(doc('title').text(),i) t11 = time.time() t12 = t11-t10 print("re :"+str(t3),"lxml :"+str(t6),"bs4 lxml:"+str(t9),"PyQuery"+str(t12))</pre> </code> |
