精易论坛

标题: Python自学记录--验证码识别模拟登陆 [打印本页]

作者: 菠萝儿    时间: 2021-10-2 20:12
标题: Python自学记录--验证码识别模拟登陆
  1. #!/usr/bin/env python
  2. # coding:utf-8

  3. import requests
  4. from hashlib import md5
  5. from lxml import etree
  6. import time

  7. t = int(round(time.time() * 1000)) #时间戳

  8. class Chaojiying_Client(object):

  9.     def __init__(self, username, password, soft_id):
  10.         self.username = username
  11.         password =  password.encode('utf8')
  12.         self.password = md5(password).hexdigest()
  13.         self.soft_id = soft_id
  14.         self.base_params = {
  15.             'user': self.username,
  16.             'pass2': self.password,
  17.             'softid': self.soft_id,
  18.         }
  19.         self.headers = {
  20.             'Connection': 'Keep-Alive',
  21.             'User-Agent': 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0)',
  22.         }

  23.     def PostPic(self, im, codetype):
  24.         """
  25.         im: 图片字节
  26.         codetype: 题目类型 参考 http://www.chaojiying.com/price.html
  27.         """
  28.         params = {
  29.             'codetype': codetype,
  30.         }
  31.         params.update(self.base_params)
  32.         files = {'userfile': ('ccc.jpg', im)}
  33.         r = requests.post('http://upload.chaojiying.net/Upload/Processing.php', data=params, files=files, headers=self.headers)
  34.         return r.json()

  35.     def ReportError(self, im_id):
  36.         """
  37.         im_id:报错题目的图片ID
  38.         """
  39.         params = {
  40.             'id': im_id,
  41.         }
  42.         params.update(self.base_params)
  43.         r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php', data=params, headers=self.headers)
  44.         return r.json()



  45. def tranformImgCode(imgPath,imgType):
  46.     chaojiying = Chaojiying_Client('超级鹰账号', '超级鹰密码', '软件ID')   
  47.     im = open(imgPath, 'rb').read()                                                
  48.     return chaojiying.PostPic(im,imgType)['pic_str']

  49. # print(tranformImgCode(r'D:\代码保存\超级鹰_Python\a.jpg',1902))

  50. session = requests.Session() #维持会话,可以让我们在跨请求时保存某些参数

  51. headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
  52. #识别验证码
  53. url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'

  54. page_text = session.get(url=url,headers=headers).text

  55. #解析验证码图片地址
  56. tree = etree.HTML(page_text)

  57. img_src = 'https://so.gushiwen.cn'+tree.xpath('//img[@id="imgCode"]/@src')[0]+'?t='+str(t)
  58. print(img_src)


  59. #将验证码保存到本地
  60. img_data= session.get(img_src,headers=headers).content

  61. with open(r'D:\代码保存\a.jpg','wb') as fp:
  62.     fp.write(img_data)

  63. #识别验证码
  64. code_text = tranformImgCode(r'D:\代码保存\a.jpg',1902)
  65. print(code_text)

  66. login_url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'

  67. data = {
  68.     '__VIEWSTATE': 'uoQIcMK9IobQkeY9MVAmtKoq33r3OLASfUdWmTdVKWkPtsNNpQMuqH92Trfdy05CDl1WFTmp5wys9JzcE+3wziAytRHxPGrKiZP9/29sMhAoT3AIsd0WDlhIqoU=',
  69.     '__VIEWSTATEGENERATOR': 'C93BE1AE',
  70.     'from': 'http://so.gushiwen.cn/user/collect.aspx',
  71.     'email': '网站账号',
  72.     'pwd': '网站密码',
  73.     'code': code_text,
  74.     'denglu': '登录',
  75. }

  76. #点击登陆按钮发起请求,获取了登陆成功后对应的页码源码数据
  77. page_text_login = session.post(url=login_url,headers=headers,data=data).text


  78. with open(r'D:\代码保存\gushiwen.html','w',encoding='utf-8') as fp:
  79.     fp.write(page_text_login)
复制代码



作者: weigar    时间: 2021-10-3 00:14
加油!易起学习努力




欢迎光临 精易论坛 (https://125.confly.eu.org/) Powered by Discuz! X3.4