2015年10月2日 星期五

How To Download content in Python using urllib2 with example

I am working on a project that needs to download content from web and parse it's data in Python. I have done some modules which is useful in downloading stuff. Or I just share the source code.

= = =
#!/usr/bin/python2
import time
import urllib2

__user_agent = "Mozilla/5.0 (Windows NT 5.1; rv:40.0) Gecko/20100101 Firefox/40.0"

def url_req(url, cookie=None, max_retry=3, retry_wait_s=5):
    r_html = None
    retry = 0

    while 1 :
        if (retry == max_retry):
            break

        data_req = urllib2.Request(url)

        #cookie support
        data_req.add_header('User-Agent', __user_agent)
        if cookie is not None:
            data_req.add_header('Cookie', cookie)

        try:
            data_handler = urllib2.urlopen(data_req)
        except urllib2.URLError as e:
            print (e.reason)
            time.sleep(retry_wait_s)
            retry = retry+1
            continue
        except:
            pass
            time.sleep(retry_wait_s)
            retry = retry+1
            continue

        try: r_html = data_handler.read()
        except urllib2.URLError as e:
            print (e.reason)
            time.sleep(retry_wait_s)
            retry = retry+1



            r_html = None
            continue
        break

    return r_html

沒有留言:

張貼留言