python 爬虫中常用的 retry模块 retrying模块 失败重试 Requests-Python 技术分享 Java技术分享 Python 爬虫技术_微信公众号:zeropython—昊天博客

普通做法:

# 第一种方式

def do_some(url, n=1):
    print(n, url)
    if n > 2:
        print('retry many times')
        return None
    try:
        r = requests.get(url, timeout=2)
        # if r.status_code !=200:
        return r.text
    except Exception as e:
        print(e.args)
        n += 1
        return do_some(url, n)

retry 模块

# coding:utf-8
from requests.exceptions import ConnectTimeout
import requests
from retry import 
__author__ = 'songhao'

# 第二中方法
@retry(exceptions=ConnectTimeout, tries=3)
def do_other(ourl):
    print(ourl, 'ddddddddddd')
    r = requests.get(ourl, timeout=1)
    return r.text

if __name__ == '__main__':
    ourl = "https://www.google.com"
    do_some(ourl)
    try:
        do_other(ourl)
    except Exception as e:
        print("dssssssssss")
        pass

retrying 模块

import requests
from retrying import retry

@retry(stop_max_attempt_number=2)
def get_html(url):
    print(url)
    r = requests.get(url,timeout=2)

    return r.status_code

if __name__ == "__main__":
    try:
        a = get_html("https://www.baidu.com")
    except Exception as e:
        print(e,'------------')
    if 'a' in locals().keys():
        print(a)
    else:
        a = 0
        print(a)

Requests 失败重试

from requests.packages.urllib3.util import Retry
from requests.adapters import HTTPAdapter
from requests import Session, exceptions

s = Session()
s.mount('https://', HTTPAdapter(
    max_retries=Retry(total=5, status_forcelist=[500])
    )
)

s.get('https://httpbin.org/status/500')
HTTPX 基础教程-新乡seo|网站优化,网站建设_微信公众号:zeropython—昊天博客