本文共 3435 字,大约阅读时间需要 11 分钟。
基于selenium模拟登录淘宝读取cookie
1.from selenium import webdriverimport timefrom selenium.webdriver.common.action_chains import ActionChains#模拟鼠标操作from selenium.webdriver import ActionChains#键盘按键操作from selenium.webdriver.common.keys import Keysfrom steting import username,passwordimport jsonoptions = webdriver.ChromeOptions()user_ag = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"options.add_argument('user-agent=%s' % user_ag)driver = webdriver.Chrome(executable_path="chromedriver", options=options)# 打开淘宝登录页面driver.get("https://login.taobao.com/member/login.jhtml?spm=a21bo.21814703.754894437.1.5af911d9tBuTtn&f=top&redirectURL=https%3A%2F%2Fwww.taobao.com%2F")# 定义js代码script = "Object.defineProperty(navigator,'webdriver',{get: ()=> false,});"# 执行js代码driver.execute_script(script)time.sleep(2)driver.find_element_by_name('fm-login-id').send_keys(username)time.sleep(2)driver.find_element_by_name('fm-login-password').send_keys(password)time.sleep(2)try: # 找到滑块 slider = driver.find_element_by_xpath("//span[contains(@class, 'btn_slide')]") # 判断滑块是否可见 if slider.is_displayed(): # 点击并且不松开鼠标 ActionChains(driver).click_and_hold(on_element=slider).perform() # 往右边移动258个位置 ActionChains(driver).move_by_offset(xoffset=258, yoffset=0).perform() # 松开鼠标 ActionChains(driver).pause(0.5).release().perform()except: passtime.sleep(2)driver.find_element_by_xpath('//*[@id="login-form"]/div[4]/button').click()time.sleep(10)#获取网站cookiedricookie = driver.get_cookies()fw = open('taobbao.txt','w')json.dump(dricookie,fw)fw.close()
steting.pyusername = '你的账号'password = '你的密码'
import timeimport jsonfrom selenium import webdriver#模拟鼠标操作from selenium.webdriver import ActionChains#键盘按键操作from selenium.webdriver.common.keys import Keysfrom bs4 import BeautifulSoupoption = webdriver.ChromeOptions()option.add_argument('--start-maximized')driver = webdriver.Chrome(options=option)driver.get('https://www.taobao.com')#print(driver)fr = open('taobbao.txt','r')coojies = json.load(fr)fr.close()for cookie in coojies: driver.add_cookie(cookie)time.sleep(6)#driver.get('https://www.taobao.com') #刷新后,登录态还在driver.implicitly_wait(2)driver.refresh()#关键字keword = '男装'#别的网页地址url = 'https://s.taobao.com/search?q=' + keworddriver.get(url)#解析网页代码soup = BeautifulSoup(driver.page_source,'lxml')#print(soup.text)data = soup.select('#mainsrp-itemlist .items .item')for data_s in data: #名称 name = data_s.find('div',class_='row row-2 title').a.text.strip() print(name) #价格 price = data_s.find('div',class_='price').text if '¥' in price: price =price.replace("¥", " ") print(price) #da = data_s.select_one('div.shop >a').get_text.strip() #店名 da = data_s.find('div',class_='shop').a.text.strip() print(da) #地区 da_qu = data_s.find('div',class_='location').text.strip() print(da_qu)
截图如下
#本地回生成一个.txt文件用来保存并读取cookie![在这里插入图片描述](https://img-blog.csdnimg.cn/20210519105644538.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzQzMjMzNzM3,size_16,color_FFFFFF,t_70#pic_center
总结模拟登录是基于cookie保持登录状态的如果cookie过期了就要重新登录了
1.淘宝反爬太强了。最后被限制了 知识点用了selenium各个模块鼠标模拟的用户操作的 登录之后保存为json格式 最后读取txt中的cookie要先登录下淘宝 driver.add_cookie(cookie)fr = open(‘taobbao.txt’,‘r’)
coojies = json.load(fr) 最后 #刷新后,登录态还在 driver.implicitly_wait(2) driver.refresh()转载地址:http://loywi.baihongyu.com/