菜鸟博客
菜鸟站长

【Python爬虫学习笔记】requests库的简单示例

注:以下为崔庆才的《52讲轻松搞定网络爬虫》拉钩课程的笔记

发送GET请求,无参数

import requests

r = requests.get('http://httpbin.org/get')
print(r.text)

发送GET请求,有参数

data = {
    'name': 'germey',
    'age': 25
}
r = requests.get('http://httpbin.org/get', params=data)
print(r.text)

抓取二进制数据

import requests

r = requests.get('https://github.com/favicon.ico')
print(r.text)
print(r.content)

#保存图片
with open('favicon.ico', 'wb') as f:
    f.write(r.content)

添加 headers

import requests

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
r = requests.get('https://static1.scrape.cuiqingcai.com/', headers=headers)
print(r.text)

发送POST请求

import requests

data = {'name': 'germey', 'age': '25'}
r = requests.post("http://httpbin.org/post", data=data)
print(r.text)

打印响应数据

import requests

r = requests.get('https://www.bilibili.com/')
print(type(r.status_code), r.status_code)
print(type(r.headers), r.headers)
print(type(r.cookies), r.cookies)
print(type(r.url), r.url)
print(type(r.history), r.history)

文件上传

import requests

files = {'file': open('favicon.ico', 'rb')}
r = requests.post('http://httpbin.org/post', files=files)
print(r.text)

获取Cookies

import requests

r = requests.get('http://www.baidu.com')
print(r.cookies)
for key, value in r.cookies.items():
    print(key + '=' + value)

设置 Cookies 的信息

import requests

cookies = '自己从github获取到自己的cookies,填写到此处'
jar = requests.cookies.RequestsCookieJar()
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
for cookie in cookies.split(';'):
    key, value = cookie.split('=', 1)
    jar.set(key, value)
r = requests.get('https://github.com/', cookies=jar, headers=headers)
print(r.text)

Session 维持,模拟同一个 Session 而不用担心 Cookies 的问题

import requests

s = requests.Session()
s.get('http://httpbin.org/cookies/set/number/123456789')
r = s.get('http://httpbin.org/cookies')
print(r.text)

SSL 证书验证

import requests
from requests.packages import urllib3
import logging

#忽略警告
urllib3.disable_warnings()
#捕捉警告到日志
logging.captureWarnings(True)
response = requests.get('https://https://www.bilibili.com/', verify=False)
print(response.status_code)

#指定一个本地证书用作客户端证书,也可以是单个文件。(本地私有证书的 key 必须是解密状态)
response = requests.get('https://static2.scrape.cuiqingcai.com/', cert=('/path/server.crt', '/path/server.key'))
print(response.status_code)

超时设置

import requests

#请求分为两个阶段,即连接(connect)和读取(read)
r = requests.get('https://httpbin.org/get', timeout=(5, 30))
#连接(connect)和读取(read)总和为1秒
r = requests.get('https://httpbin.org/get', timeout=1)
#永久等待服务器响应
r = requests.get('https://httpbin.org/get', timeout=None)
#永久等待服务器响应
r = requests.get('https://httpbin.org/get')
print(r.status_code)

身份认证之 HTTPBasicAuth

import requests
from requests.auth import HTTPBasicAuth

r = requests.get('https://static3.scrape.cuiqingcai.com/', auth=HTTPBasicAuth('admin', 'admin'))
#另外一种写法
r = requests.get('https://static3.scrape.cuiqingcai.com/', auth=('admin', 'admin'))
print(r.status_code)

身份认证之 OAuth

import requests
from requests_oauthlib import OAuth1

#需要安装requests_oauthlib库
url = 'https://api.twitter.com/1.1/account/verify_credentials.json'
auth = OAuth1('YOUR_APP_KEY', 'YOUR_APP_SECRET',
              'USER_OAUTH_TOKEN', 'USER_OAUTH_TOKEN_SECRET')
requests.get(url, auth=auth)

代理设置

import requests

proxies = {
  'http': 'http://10.10.10.10:1080',
  'https': 'http://10.10.10.10:1080',
}
requests.get('https://httpbin.org/get', proxies=proxies)
#身份验证的代理
proxies = {'https': 'http://user:[email protected]:1080/',}
requests.get('https://httpbin.org/get', proxies=proxies)

# SOCKS 协议代理,需要安装socks库
proxies = {
    'http': 'socks5://user:[email protected]:port',
    'https': 'socks5://user:[email protected]:port'
}
requests.get('https://httpbin.org/get', proxies=proxies)

构造 Prepared Request 对象

from requests import Request, Session

url = 'http://httpbin.org/post'
data = {'name': 'germey'}
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36'
}
s = Session()
req = Request('POST', url, data=data, headers=headers)
prepped = s.prepare_request(req)
r = s.send(prepped)
print(r.text)
赞(0)
未经允许不得转载:不吃香菜 » 【Python爬虫学习笔记】requests库的简单示例

评论 抢沙发

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址