一、背景- B: Z( J3 N* j- W2 \
在1688官网里面有很多信息是需要登录才能看得到的,比如商家的联系电话等等。那么我们在抓取它的网页的时候,肯定是需要维持登录状态才能得到对应的内容。这里面就会涉及到自动登录的问题。
9 A2 g) g4 h# G- I. j" G1 i登录地址:https://login.1688.com/member/signin.htm5 ?# B" }0 }& g# S, p; P `
$ v1 P+ g# A* H) X( J二、自动登录方法5 R) I0 ~* J1 F
1、找到对应的元素,账号、密码框。 I: Z$ X, |9 C0 x+ T: t
1 R8 S' t, K5 w5 S5 m
2、把账号、密码值带进去。, L; {$ w7 o' n" Y0 o8 }
- List<string> logininfolist = new List<string>();
- string file = "1688Account.json";
- if (!File.Exists(file))
- {
- throw new ArgumentException("1688Account not found");
- }
- string data = File.ReadAllText(file, Encoding.UTF8);
- var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
- logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
- Random rdinfo = new Random();
- int indexinfo = rdinfo.Next(logininfolist.Count);
- var modelinfo = logininfolist[indexinfo];
- driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
- driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);
3、模拟点击提交按钮操作。
# R( ~4 G2 q* b% B9 ^9 y4 K- // Thread.Sleep(1000 * 30); //30时间操作
- driver.FindElement(By.ClassName("password-login")).Click();
- Thread.Sleep(1000 * 30);//30时间操作
4、进入控制台,验证是否登陆成功。并记录Cookies,下次再来的时候,直接使用现成的Cookies,一般有效期可以维持一天多左右。( L- d# J" o2 K& C, ~
- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
- Thread.Sleep(1000 * 5);
- if (driver.Url.Contains("login.1688.com"))
- {
- Console.WriteLine("登录失败");
- CookieHelp.DeleteCookies();
- Console.WriteLine("2");
- throw new Exception("重新登录");
- }
- driver.Navigate().Refresh();
- CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);
5、判断是否有现成的登录cookies。/ A% J- y6 G3 p6 d# Y% M9 r
- driver.Navigate().GoToUrl("https://www.1688.com/");
- driver.Manage().Cookies.DeleteAllCookies();
- var listCookie = CookieHelp.GetCookie();
- if (listCookie != null)
- {
- logintry = 0;
- Console.WriteLine("有现成cookies" + DateTime.UtcNow);
- foreach (var item in listCookie)
- {
- driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
- }
- Thread.Sleep(2000);
-
- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
-
- Thread.Sleep(1000 * 2);
三、完整代码
7 N- K- E$ z! @! C- /// <summary>
- /// 登录 todo
- /// </summary>
- /// <param name="_reptilesImageSearchService"></param>
- /// <param name="options"></param>
- /// <param name="driver"></param>
- public void Implement(IReptilesImageSearchService _reptilesImageSearchService, IWebDriver driver)
- {
- driver.Navigate().GoToUrl("https://www.1688.com/");
- driver.Manage().Cookies.DeleteAllCookies();
- var listCookie = CookieHelp.GetCookie();
- if (listCookie != null)
- {
- logintry = 0;
- Console.WriteLine("有现成cookies" + DateTime.UtcNow);
- foreach (var item in listCookie)
- {
- driver.Manage().Cookies.AddCookie(new Cookie(item.Name, item.Value, item.Domain, item.Path, item.Expiry));
- }
- Thread.Sleep(2000);
-
- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
-
- Thread.Sleep(1000 * 2);
- if (driver.Url.Contains("login.1688.com"))
- {
- Console.WriteLine("cookies过期了");
- CookieHelp.DeleteCookies();
- Console.WriteLine("1");
- throw new Exception("重新登录");
- }
- }
- else
- {
- if (logintry > 4)
- {
- Console.WriteLine("登陆次数超出:" + logintry);
- throw new Exception("登陆次数超出,退出");
- }
- logintry++;
- Console.WriteLine("无现成cookies" + DateTime.UtcNow);
- driver.Navigate().GoToUrl("https://login.1688.com/member/signin.htm");
- #region 登录动作
- driver.SwitchTo().Frame(0);
-
- IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
- ////// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
-
- string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
- string returnjs = (string)js.ExecuteScript(jsfile);
-
- List<string> logininfolist = new List<string>();
- string file = "1688Account.json";
- if (!File.Exists(file))
- {
- throw new ArgumentException("1688Account not found");
- }
- string data = File.ReadAllText(file, Encoding.UTF8);
- var account1688list = JsonConvert.DeserializeObject<List<Account1688Item>>(data);
- logininfolist.AddRange(account1688list.Select(o => o.AccountPassword).ToList());
- Random rdinfo = new Random();
- int indexinfo = rdinfo.Next(logininfolist.Count);
- var modelinfo = logininfolist[indexinfo];
- driver.FindElement(By.Id("fm-login-id")).SendKeys(modelinfo.Split('+')[0]);
- driver.FindElement(By.Id("fm-login-password")).SendKeys(modelinfo.Split('+')[1]);
- // Thread.Sleep(1000 * 30); //30时间操作
- driver.FindElement(By.ClassName("password-login")).Click();
- Thread.Sleep(1000 * 30);//30时间操作
-
- #endregion 登录动作
- driver.Navigate().GoToUrl("https://work.1688.com/?tracelog=login_target_is_blank_1688");
- Thread.Sleep(1000 * 5);
- if (driver.Url.Contains("login.1688.com"))
- {
- Console.WriteLine("登录失败");
- CookieHelp.DeleteCookies();
- Console.WriteLine("2");
- throw new Exception("重新登录");
- }
- driver.Navigate().Refresh();
- CookieHelp.WriteCookies(driver.Manage().Cookies.AllCookies);
- }
- Thread.Sleep(1000);
- }
四、注意事项
2 Z+ m8 k1 U. R7 t6 s1、登录的时候,有时候会出现滑块验证码,这时候一般是使用其他账号重试或者是在当前的机器手工登录一次,后面基本就会被信任。9 l4 z# u+ P' q
2、使用这段代码本身就是模拟真实用户的行为,最大限度的减少验证码出现的几率。# o q& F3 f7 e4 a
- IJavaScriptExecutor js = (IJavaScriptExecutor)driver;
- ////// string returnjs = (string)js.ExecuteScript("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});");
-
- string jsfile = File.ReadAllText("stealth.min.js", Encoding.UTF8);
- string returnjs = (string)js.ExecuteScript(jsfile);
|