一、介绍7 |3 P" h6 N6 o. \4 v1 [- Q/ p
原理为使用selenium驱动chorme打开一个新的进程并打开数组中的网址,之后程序自动输入我们事先填入的账号密码,通过已实现的验证码识别模块填写验证码进行登录。登陆完成后自动切换页面,进行下一个页面的登录
' p' v+ ~# a( q) f/ k7 m9 u4 R4 x二、准备
, z' X8 _" A. m9 s- 部署环境:win10
- 开发环境:python2.7
- chrome版本89.0.4389.128
" R/ K5 G! `, p4 } 三、实践
7 ? A# S7 H8 v5 S6 r5 t v! r) ]3.1 下载驱动
; D" H7 D, o; ~; _! ~) l- 设置查看chorme版本
- 下载对应版本的chromedriver
- 解压后,将chromedriver.exe分别放进chrome浏览器目录 和 Python根目录
- chrome浏览器目录(如:C:\Program Files (x86)\Google\Chrome\Application)
- Python根目录(如:D:\Python\Python37)8 ~4 ?( X- k5 t( S& N) z
3.2 安装python依赖 L5 S7 l5 J7 L7 A% d
- pip install pillow
- pip install selenium6 K. n5 H/ d6 \6 g0 [" o; U
3.3 编写程序
2 J3 T5 F: a" W0 z6 P batchlogin.py$ F5 ^- |0 a9 l. q- |9 Z
#coding=UTF-8
import time
import os
from selenium import webdriver
from selenium.common.exceptions import NoAlertPresentException
from pytesser import *
from PIL import Image
from PIL import ImageEnhance
from PIL import ImageFilter
import traceback
threshold = 140
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
rep={'O':'0',
'I':'1','L':'1',
'Z':'2',
'S':'8'
};
## 灰度化照片后得到验证码
def getverify1(name):
im = Image.open(name)
# 转化到灰度图
imgry = im.convert('L')
# imgry.save('g'+name)
# 二值化,采用阈值分割法,threshold为分割点
out = imgry.point(table,'1')
# out.save('b'+name)
# 识别
text = image_to_string(out)
# 校正
text = text.strip()
text = text.upper();
for r in rep:
text = text.replace(r,rep[r])
# out.save(text+'.jpg')
print text
return text
# 获取浏览器当前的验证码图片并调用返回验证码
def getVCode(driver):
# 保存浏览器当前页面
driver.save_screenshot("page.png")
# 从页面中截取验证码(XPATH定位)
vcode = driver.find_element_by_xpath("//*[@id='randImage']")
# 获取验证码上下左右边界坐标(手动加减像素以更精确)
loc = vcode.location
size = vcode.size
left = loc['x']+5
top = loc['y']
right = (loc['x'] +size['width']-5)
button = (loc['y']+size['height'])
# 截取页面中的验证码(进行截图:参数时一个元组(left,top,right,button)并保存
page_pic = Image.open('page.png')
v_code_pic = page_pic.crop((left,top,right,button))
v_code_pic.save('yzm.png')
return getverify1('yzm.png')
# return getverify1(v_code_pic)
#自动登录操作(参数为登路账号,密码,webdriver驱动对象)
def login(username,password,driver):
v_code = getVCode(driver)
driver.find_element_by_id('user_name').click() # 点击用户名输入框
driver.find_element_by_id('user_name').clear() # 清空输入框
driver.find_element_by_id('user_name').send_keys(username) # 自动敲入用户名
driver.find_element_by_id('user_password').click() # 点击密码输入框
driver.find_element_by_id('user_password').clear() # 清空输入框
driver.find_element_by_id('user_password').send_keys(password) # 自动敲入密码
driver.find_element_by_id('v_code').click() # 点击验证码输入框
driver.find_element_by_id('v_code').clear() # 清空输入框
driver.find_element_by_id('v_code').send_keys(v_code) # 自动敲入验证码
driver.find_element_by_xpath('//*[@id="SubmitButton"]').click() open.py
5 K" B/ c& ?/ P" @, K, \#coding=UTF-8
from selenium import webdriver
from batchlogin import *
driver = webdriver.Chrome()
urls=[
'http://www.test.com:6086/QX_SPD_B2B/',
'http://www.test.com:6086/QX_SPD_B2B/'
]
for i in range(0, len(urls)):
url = urls[i]
windows_open = "window.open('" + url + "')"
driver.execute_script(windows_open)
time.sleep(1)
#获取当前页面句柄
windows = driver.window_handles
driver.switch_to_window(windows[i+1])
login("USERNAME","PASSWORD",driver)
#cookies=driver.get_cookies()
#print(cookies)
##解决页面加载不正确的问题
time.sleep(2)
time.sleep(100)
driver.close() 3.4 优化
3 s+ _- k1 @$ u# A7 _/ ] 在图片保存那块直接读取网站的图片并且不保存直接识别。 |