这是一个python通过urllib直接登陆网站,并处理网站的session和cookie
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
import cookielib, urllib, urllib2 login = <span class="hljs-string">'ismellbacon123@yahoo.com'</span> password = <span class="hljs-string">'login'</span> <span class="hljs-comment"># Enable cookie support for urllib2</span> cookiejar = cookielib.CookieJar() urlOpener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookiejar)) <span class="hljs-comment"># Send login/password to the site and get the session cookie</span> <span class="hljs-keyword">values</span> = {<span class="hljs-string">'login'</span>:login, <span class="hljs-string">'password'</span>:password } data = urllib.urlencode(<span class="hljs-keyword">values</span>) request = urllib2.Request(<span class="hljs-string">"http://www.imdb.com/register/login"</span>, data) url = urlOpener.<span class="hljs-keyword">open</span>(request) <span class="hljs-comment"># Our cookiejar automatically receives the cookies</span> page = url.<span class="hljs-keyword">read</span>(<span class="hljs-number">500000</span>) <span class="hljs-comment"># Make sure we are logged in by checking the presence of the cookie "id".</span> <span class="hljs-comment"># (which is the cookie containing the session identifier.)</span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> <span class="hljs-string">'id'</span> in [cookie.name <span class="hljs-keyword">for</span> cookie in cookiejar]: raise ValueError, <span class="hljs-string">"Login failed with login=<span class="hljs-variable">%s</span>, password=<span class="hljs-variable">%s</span>"</span> % (login,password) <span class="hljs-keyword">print</span> <span class="hljs-string">"We are logged in !"</span> <span class="hljs-comment"># Make another request with our session cookie</span> <span class="hljs-comment"># (Our urlOpener automatically uses cookies from our cookiejar)</span> url = urlOpener.<span class="hljs-keyword">open</span>(<span class="hljs-string">'http://imdb.com/find?s=all&q=grave'</span>) page = url.<span class="hljs-keyword">read</span>(<span class="hljs-number">200000</span>) |
