|
- #爬取豆瓣电影排名
- import re
- import urllib.request
-
- #确定数量
- page=input("请输入您要cha询的数量:")
-
- #UA
- headers={
- "User-Agent":"Mozilla/5.0 \
- (Windows NT 10.0; WOW64) AppleWebKit/537.36 \
- (KHTML, like Gecko) Chrome/86.0.4240.198 \
- Safari/537.36"
- }
-
- #排名链接
- url="https://movie.douban.com/j/chart/top_list?type=11&interval_id=100%3A90&action=&start=0&limit="+page
-
- #获取响应信息
- req=urllib.request.Request(url,headers=headers)
-
- #读取
- data=urllib.request.urlopen(req).read().decode()
-
- pat1=r'"rating":\["(.*?)","\d+"\]'
- pat2=r'"title":"(.*?)"'
-
- pattern1=re.compile(pat1)
- pattern2=re.compile(pat2)
-
- data1=pattern1.findall(data,re.I)
- data2=pattern2.findall(data,re.I)
-
- for i in range(len(data1)):
- print("排名:",i+1,"电影名:",data2[i],"豆瓣评分:",data1[i])
-
复制代码
|
|