精易论坛
标题:
绝地战绩数据抓取
[打印本页]
作者:
andwp
时间:
2018-1-24 22:21
标题:
绝地战绩数据抓取
初学python,拿绝地的战绩cha询练练手
通过对地址 https://pubgtracker.com/profile/pc/Xinyibaby-Aini?region=as
的抓包分析,逆向出数据的获取流程,最后用python模拟拿到数据。
代码写得烂,贴出来,欢迎找茬。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# 文件名:bgload.py
# 作者:andwp QQ:0x709933
# 使用说明:python 2.7.10 绝地战绩数据抓取,仅解析部分数据
# > python
# >> import bgload
# >> jsobj = bgload.seach('name', 'as')
#
import requests
import re
import sys
import time
import urllib
import json
def runreq(url):
user_agent = 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.20 Safari/537.36'
payload = {'User-Agent':user_agent, 'Host':'pubgtracker.com','Accpet-Encoding':'gzip, deflate, br','Accpet-language':'zh-Hans-CN, zh-Hans; q=0.5','Upgrade-Insecure-Requests':'1', 'Referer': url}
session = requests.Session()
t = session.get(url, headers=payload)
cmpurl = urlextract(url, t.text)
payload = {'User-Agent':user_agent, 'Host':'pubgtracker.com','Referer':url, 'Accpet' :'text/html, application/xhtml+xml, image/jxr, */*','Accpet-Encoding':'gzip, deflate','Accpet-language':'zh-Hans-CN, zh-Hans; q=0.5', 'Upgrade-Insecure-Requests':'1'}
print 'wait 4 load %s' % (cmpurl)
time.sleep(3)
t = session.get(cmpurl, headers=payload,allow_redirects=True)
print t.status_code
return jsonextract(t.text)
def urlextract(oriurl, context):
tt1 = re.findall('[A-Za-z]+\=\{"[A-Za-z]+"\:\W+\}\;', context)
tt2 = re.findall('[A-Za-z]+\.[A-Za-z]+[\+\-\*]\=\W+\;', context)
fstr = re.sub('\=\{"', '.', tt1[0])
fstr = re.sub('"\:', '=', fstr)
fstr = re.sub('\}', '', fstr)
#print start
a = []
a.append(fstr)
for item in tt2:
a.append(item)
bAry = []
for item in a:
temstr = re.sub('\!\+\[\]', '1', item)
temstr = re.sub('\!\!\[\]', '1', temstr)
temstr = re.sub('\[\]', '0', temstr)
temstr = re.sub('\+\(\(', '(10*(', temstr)
temstr = re.sub('\(\+', '(', temstr)
temstr = re.sub('\.', 'tmp', temstr)
bAry.append(temstr)
sc = re.search('[A-Za-z]+',bAry[0]).group(0)
tmpfeild = 'print '+ sc
hosturl = re.match( 'https?\:\/\/\w+\.\w+', oriurl).group(0)
urlLen = hosturl.__len__() - 8
# bAry.append(tmpfeild)
bAry.append('%s+=%d' % (sc, urlLen))
# bAry.append(tmpfeild)
for item in bAry:
exec(item)
# print item
answer = locals()[sc]
jschl_vc = re.findall('jschl_vc"[^"]+"([^"]+)"', context)[0]
passval = re.findall('pass"[^"]+"([^"]+)"', context)[0]
action = re.findall('action\="([^"]+)"', context)[0]
getargs = { 'pass': passval }
encodeGet = urllib.urlencode(getargs)
cmpurl = '%s%s?jschl_vc=%s&%s&jschl_answer=%s' % (hosturl, action, jschl_vc, encodeGet, answer)
return cmpurl
def jsonextract(html):
js = re.findall('var playerData[ |\=]+(\{"[\/|\.|\w|\d|\{|\}|\'|"|\[|\]|\;|\,|\:| |\-|\%]+\})', html)
if js <> None and js.__len__() > 0:
jsobject = json.loads(js[0])
print 'Player name:%s; SteamID:%s' % (jsobject['PlayerName'], jsobject['SteamId'])
stas = jsobject['Stats']
i = -1
if stas <> None:
i = stas.__len__()
while i > 0:
i-=1
print 'Region:%s ; Season:%s; Match:%s' % (stas[i]['Region'],stas[i]['Season'],stas[i]['Match'])
stasInfo = stas[i]['Stats']
if stasInfo <> None:
for info in stasInfo:
print '%s:%s\tcategory:%s' % (info['field'],info['displayValue'],info['category'])
return js[0]
else:
return html
def seach(name, region):
'''执行cha询方法
[url=home.php?mod=space&uid=47252]@name[/url] 用户ID
@region 区域 亚洲:AS 欧洲:EU 南美:NA 澳洲:OC 南韩/日本:JPKR 东南亚:SEA 所有区域统计:AGG
'''
urlstr = 'https://pubgtracker.com/profile/pc/%s?region=%s' % (name, region)
return runreq(urlstr)
if __name__ == "__main__":
''' 测试方法,测试执行的用户 '''
seach(name = 'Xinyibaby-Aini', region = 'as')
复制代码
运行结果:
steamseach.png
(82.74 KB, 下载次数: 0)
下载附件
输出
2018-1-24 22:16 上传
作者:
Awesome·LYG
时间:
2018-1-25 00:28
这个网站应该不用抓包吧?别人有api说明呢
作者:
q374702102
时间:
2018-1-25 00:40
感谢分享,支持楼主!
作者:
1150129654
时间:
2022-7-30 09:19
666666666666666666666
欢迎光临 精易论坛 (https://125.confly.eu.org/)
Powered by Discuz! X3.4