Urllib库使用

请求网页

1
2
3
4
import urllib2

response = urllib2.urlopen("http://www.baidu.com")
print response.read()
1
python demo.py

分析返回

1
2
3
response = urllib2.urlopen("http://www.baidu.com")
urlopen(url, data, timeout)
print response.read()

构造Request

1
2
3
4
5
import urllib2

request = urllib2.Request("http://www.baidu.com")
response = urllib2.urlopen(request)
print response.read()

POST 方式

1
2
3
4
5
6
7
8
9
import urllib
import urllib2

values = {"username":"310869927@qq.com","password":"XXXX"}
data = urllib.urlencode(values)
url = "https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn"
request = urllib2.Request(url,data)
response = urllib2.urlopen(request)
print response.read()

GET 方式

1
2
3
4
5
6
7
8
9
10
11
import urllib
import urllib2

values = {}
values['username'] = "310869927@qq.com"
values['password'] = "XXXX"
data = urllib.urlencode(values)
url = "http://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn"
request = urllib2.Request(url,data)
response = urllib2.urlopen(request)
print response.read()

设置 Headers

1
2
3
4
5
6
7
8
9
10
11
import urllib  
import urllib2

url = 'http://www.server.com/login'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values = {'username' : 'jhayes', 'password' : 'XXXX' }
headers = { 'User-Agent' : user_agent }
data = urllib.urlencode(values)
request = urllib2.Request(url, data, headers)
response = urllib2.urlopen(request)
page = response.read()
1
2
3
4
5
6
7
8
9
10
11
import urllib  
import urllib2

url = 'http://www.server.com/login'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
values = {'username' : 'jhayes', 'password' : 'XXXX' }
headers = { 'User-Agent' : user_agent }
data = urllib.urlencode(values)
request = urllib2.Request(url, data, headers)
response = urllib2.urlopen(request)
page = response.read()
1
2
headers = { 'User-Agent' : 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'  ,
'Referer':'http://www.zhihu.com/articles' }

Proxy(代理)的设置

1
2
3
4
5
6
7
8
9
import urllib2
enable_proxy = True
proxy_handler = urllib2.ProxyHandler({"http" : 'http://some-proxy.com:8080'})
null_proxy_handler = urllib2.ProxyHandler({})
if enable_proxy:
opener = urllib2.build_opener(proxy_handler)
else:
opener = urllib2.build_opener(null_proxy_handler)
urllib2.install_opener(opener)

Timeout 设置

1
2
3
4
5
import urllib2
response = urllib2.urlopen('http://www.baidu.com', timeout=10)
import urllib2
response = urllib2.urlopen('http://www.baidu.com',data, 10)

PUT / DELETE

1
2
3
4
import urllib2
request = urllib2.Request(uri, data=data)
request.get_method = lambda: 'PUT' # or 'DELETE'
response = urllib2.urlopen(request)

Urllib库使用
http://jhayes.cn/blog/104473986.html
作者
JHAYES
发布于
2017年5月2日
许可协议