• 编写爬虫获取英雄联盟所有英雄的全部皮肤
  • 需要库:
    • requests
    • re
    • json
    • os
  • 效果:在该代码文件目录下生成n个Hero文件夹保存皮肤图片。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
"""
Date: 2019.11.7
Author: Justlovesmile
Language: python3
"""
import requests
import re
import json
import os

def getHtml(url):
try:
r=requests.get(url)
r.raise_for_status()
r.encoding=r.apparent_encoding
except:
print(url+"爬取失败!")
else:
response = r.text
getInfo(response)

def getInfo(res):
lists=re.findall(r'"keys":(.*?),"data"',res)
#print(lists)
hero_id=json.loads(lists[0])
#print(hero_id)
for hero in hero_id.values():
getSkin(hero)

def getSkin(hero):
url='https://lol.qq.com/biz/hero/'+hero+'.js'
try:
r=requests.get(url)
r.raise_for_status()
r.encoding=r.apparent_encoding
except:
print(url+"爬取失败!")
else:
html=r.text
num=re.findall(r'"id":"(\d{4,6})","num"',html)
for i in range(len(num)):
img_url='https://game.gtimg.cn/images/lol/act/img/skin/big' + num[i] + '.jpg'
save_img(hero,img_url)

def save_img(hero,img_url):
root=hero+"\\"
path=root+img_url.split('/')[-1]
try:
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r=requests.get(img_url)
with open(path,'wb') as f:
f.write(r.content)
f.close()
print("文件保存成功!")
else:
print("文件已存在!")
except:
print("爬取失败!")
print(img_url+"已下载")

def main():
url="https://lol.qq.com/biz/hero/champion.js"
getHtml(url)

if __name__ == "__main__":
main()