一、介绍7 L, i4 b5 D+ X* k( W6 C
原理为使用selenium驱动chorme打开一个新的进程并打开数组中的网址,之后程序自动输入我们事先填入的账号密码,通过已实现的验证码识别模块填写验证码进行登录。登陆完成后自动切换页面,进行下一个页面的登录
* V7 e8 Q2 d* M! i8 E& t二、准备
# A' T% L3 r7 m' c# s- 部署环境:win10
- 开发环境:python2.7
- chrome版本89.0.4389.128& f; |/ W7 |% |* }
三、实践% G) k7 e4 b# A% ~! [: H6 v3 x
3.1 下载驱动
. X ?; f9 y a6 ]! Q! U$ g- 设置查看chorme版本
- 下载对应版本的chromedriver
- 解压后,将chromedriver.exe分别放进chrome浏览器目录 和 Python根目录
- chrome浏览器目录(如:C:\Program Files (x86)\Google\Chrome\Application)
- Python根目录(如:D:\Python\Python37); V# X& J! t2 F y2 y: A0 j
3.2 安装python依赖5 T7 R* e9 j4 k9 C
- pip install pillow
- pip install selenium
# O# F$ V$ G/ x H' Q 3.3 编写程序, u ^" n6 r$ S6 b, x* k5 U
batchlogin.py
" H. e3 D- T1 z$ V% B# h/ _
- #coding=UTF-8
- import time
- import os
- from selenium import webdriver
- from selenium.common.exceptions import NoAlertPresentException
- from pytesser import *
- from PIL import Image
- from PIL import ImageEnhance
- from PIL import ImageFilter
- import traceback
-
- threshold = 140
- table = []
- for i in range(256):
- if i < threshold:
- table.append(0)
- else:
- table.append(1)
- rep={'O':'0',
- 'I':'1','L':'1',
- 'Z':'2',
- 'S':'8'
- };
-
- ## 灰度化照片后得到验证码
- def getverify1(name):
- im = Image.open(name)
- # 转化到灰度图
- imgry = im.convert('L')
- # imgry.save('g'+name)
- # 二值化,采用阈值分割法,threshold为分割点
- out = imgry.point(table,'1')
- # out.save('b'+name)
- # 识别
- text = image_to_string(out)
- # 校正
- text = text.strip()
- text = text.upper();
- for r in rep:
- text = text.replace(r,rep[r])
- # out.save(text+'.jpg')
- print text
- return text
-
- # 获取浏览器当前的验证码图片并调用返回验证码
- def getVCode(driver):
- # 保存浏览器当前页面
- driver.save_screenshot("page.png")
- # 从页面中截取验证码(XPATH定位)
- vcode = driver.find_element_by_xpath("//*[@id='randImage']")
- # 获取验证码上下左右边界坐标(手动加减像素以更精确)
- loc = vcode.location
- size = vcode.size
- left = loc['x']+5
- top = loc['y']
- right = (loc['x'] +size['width']-5)
- button = (loc['y']+size['height'])
- # 截取页面中的验证码(进行截图:参数时一个元组(left,top,right,button)并保存
- page_pic = Image.open('page.png')
- v_code_pic = page_pic.crop((left,top,right,button))
- v_code_pic.save('yzm.png')
- return getverify1('yzm.png')
- # return getverify1(v_code_pic)
-
- #自动登录操作(参数为登路账号,密码,webdriver驱动对象)
- def login(username,password,driver):
- v_code = getVCode(driver)
- driver.find_element_by_id('user_name').click() # 点击用户名输入框
- driver.find_element_by_id('user_name').clear() # 清空输入框
- driver.find_element_by_id('user_name').send_keys(username) # 自动敲入用户名
-
- driver.find_element_by_id('user_password').click() # 点击密码输入框
- driver.find_element_by_id('user_password').clear() # 清空输入框
- driver.find_element_by_id('user_password').send_keys(password) # 自动敲入密码
-
- driver.find_element_by_id('v_code').click() # 点击验证码输入框
- driver.find_element_by_id('v_code').clear() # 清空输入框
- driver.find_element_by_id('v_code').send_keys(v_code) # 自动敲入验证码
-
- driver.find_element_by_xpath('//*[@id="SubmitButton"]').click()
open.py8 p* x0 U4 K% n/ t4 Z8 N; J
- #coding=UTF-8
- from selenium import webdriver
- from batchlogin import *
- driver = webdriver.Chrome()
- urls=[
- 'http://www.test.com:6086/QX_SPD_B2B/',
- 'http://www.test.com:6086/QX_SPD_B2B/'
- ]
-
- for i in range(0, len(urls)):
- url = urls[i]
- windows_open = "window.open('" + url + "')"
- driver.execute_script(windows_open)
- time.sleep(1)
- #获取当前页面句柄
- windows = driver.window_handles
- driver.switch_to_window(windows[i+1])
- login("USERNAME","PASSWORD",driver)
-
- #cookies=driver.get_cookies()
- #print(cookies)
- ##解决页面加载不正确的问题
- time.sleep(2)
-
-
- time.sleep(100)
- driver.close()
3.4 优化
6 L& y" n: a6 L7 ~# S. o# h+ D 在图片保存那块直接读取网站的图片并且不保存直接识别。 |