問題:
requests下載圖片太麻煩了,雖然urlretrieve可以一步搞定,但是我想統(tǒng)一使用requests庫。注釋部分就是用requests寫的。
from urllib.request import urlretrieve
import requests
from bs4 import BeautifulSoup
r = requests.get("http://www.pythonscraping.com")
bs = BeautifulSoup(r.text)
image = bs.find("a", {"id": "logo"}).find("img")["src"]
urlretrieve(image, "logo.jpg")
# ir = requests.get(image, stream=True)
# if ir.status_code == 200:
# with open('logo.jpg', 'wb') as f:
# for chunk in ir:
# f.write(chunk)
回答
import requests
from bs4 import BeautifulSoup
r = requests.get("http://www.pythonscraping.com")
bs = BeautifulSoup(r.text,'lxml')
image = bs.find("a", {"id": "logo"}).find("img")["src"]
ir = requests.get(image)
if ir.status_code == 200:
open('logo.jpg', 'wb').write(ir.content)
或者用正則
import re, requests
r = requests.get("http://www.pythonscraping.com")
p = re.compile(r'<a[^>]*?id="logo"[^<]*?<img[^>]*?src="([^"]*)')
image = p.findall(r.text)[0]
ir = requests.get(image)
sz = open('logo.jpg', 'wb').write(ir.content)
print('logo.jpg', sz,'bytes')