selenium 的简单使用
selenium文档地址:https://selenium-python.readthedocs.io/installation.html
声明
from selenium import webdriver
browser = webdriver.Chrome()
browser = webdriver.Firefox()
browser = webdriver.Edge()
browser = webdriver.Safari()
访问页面
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
# 获取网页的源代码
print(browser.page_source)
browser.close()
查找单个节点
from selenium import webdriver
browser = webdriver.Chrome()
# 根据 ID 查找
input_first = browser.find_element_by_id('q')
# 根据 CSS 选择器查找
input_second = browser.find_element_by_css_selector('#q')
# 使用 Xpath 查找
input_third = browser.find_element_by_xpath('//*[@id="q"]')
print(input_first, input_second, input_third)
browser.close()
查找多个节点
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
# 使用 CSS 选择器查找 service-bd 下的 所有 li 标签
lis = browser.find_elements_by_css_selector('.service-bd li')
print(lis)
browser.close()
节点交互
在淘宝中搜索iPhone
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
# 获取搜索框
input = browser.find_element_by_id('q')
# 在搜索框输入:iPhone
input.send_keys('iPhone')
time.sleep(1)
button = browser.find_element_by_class_name('btn-search')
# 点击搜索按钮
button.click()
动作链
实现一个节点的拖拽操作,将某个节点从一处拖拽到另外一处
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element_by_css_selector('#draggable')
target = browser.find_element_by_css_selector('#droppable')
actions = ActionChains(browser)
actions.drag_and_drop(source, target)
actions.perform()
执行 JavaScript
下拉进度条,下拉到底部
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')
获取节点信息
获取属性
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome()
url = 'https://bcxc.net/'
browser.get(url)
logo = browser.find_element_by_class_name('logo')
print(logo)
print(logo.get_attribute('src'))
获取节点信息
获取文本值
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome()
url = 'https://bcxc.net/'
browser.get(url)
input = browser.find_element_by_class_name('logo')
print(input.text)
获取节点信息
获取 ID、位置、标签名、大小
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome()
url = 'https://bcxc.net/'
browser.get(url)
input = browser.find_element_by_class_name('logo')
print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)
切换 Frame
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
try:
logo = browser.find_element_by_class_name('logo')
except NoSuchElementException:
print('NO LOGO')
browser.switch_to.parent_frame()
logo = browser.find_element_by_class_name('logo')
print(logo)
print(logo.text)
延时等待之隐式等待
隐式等待可以在查找节点而节点并没有立即出现的时候,等待一段时间再查找 DOM,默认的时间是 0
from selenium import webdriver
browser = webdriver.Chrome()
browser.implicitly_wait(10)
url = 'https://bcxc.net/'
input = browser.find_element_by_class_name('logo')
print(input)
延时等待之显式等待
显式等待超出指定时间未加载,就会抛出异常
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome()
browser.get('https://www.taobao.com/')
# 显式等待10秒
wait = WebDriverWait(browser, 10)
input = wait.until(EC.presence_of_element_located((By.ID, 'q')))
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
print(input, button)
Cookies
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())
反屏蔽
from selenium import webdriver
from selenium.webdriver import ChromeOptions
chrome_options = ChromeOptions()
# 无头模式
chrome_options.add_argument('--headless')
# 指定浏览器分辨率
chrome_options.add_argument('window-size=1920x3000')
# 谷歌文档提到需要加上这个属性来规避bug
chrome_options.add_argument('--disable-gpu')
# 不加载图片, 提升速度
chrome_options.add_argument('blink-settings=imagesEnabled=false')
# 为Chrome开启实验性功能参数excludeSwitches
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
# 禁用chrome正受自动测试软件控制的提示
chrome_options.add_experimental_option("useAutomationExtension", False)
browser = webdriver.Chrome(options=chrome_options)
# 设定webdriver=undefined以避免Selenium检测
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {'source':'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'})
browser.get('https://antispider1.scrape.cuiqingcai.com/')