精易论坛
标题:
爬取百度图片
[打印本页]
作者:
ideologism
时间:
2019-1-5 15:12
标题:
爬取百度图片
import
re
import
requests
from
requests.exceptions
import
RequestException
from
multiprocessing
import
Pool
word
=
'油菜'
def
dowmloadPic
(
i
)
:
url
=
'http://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word='
+
word
+
'&ct=201326592&v=flip'
result
=
requests.
get
(url)
html
=
result.text
pic_url
=
re.
findall
(
'"objURL":"(.*?)",'
, html, re.S)
print
(
'找到关键词:'
+
word
+
'的图片,现在开始下载图片...'
)
for
each
in
pic_url
:
print
(
'正在下载第'
+
str
(
i
)
+
'张图片,图片地址:'
+
str
(each))
try
:
pic
=
requests.
get
(each,
timeout
=
10
)
except
RequestException
:
print
(
'【错误】当前图片无法下载'
)
continue
dir
=
'images/'
+
word
+
'_'
+
str
(
i
)
+
'.jpg'
fp
=
open
(dir,
'wb'
)
fp.
write
(pic.content)
fp.
close
()
i
+=
1
if
__name__
==
'__main__'
:
pool
=
Pool
()
pool.
map
(dowmloadPic,[i
for
i
in
range
(
600
)])
作者:
等闲之辈
时间:
2023-11-27 13:47
感谢分享~
欢迎光临 精易论坛 (https://125.confly.eu.org/)
Powered by Discuz! X3.4