python写的小爬虫
import uuid
import requests
from bs4 import BeautifulSoup
def download_image(url, filename):
response = requests.get(url, stream=True)
if response.status_code == 200:
with open(filename, 'wb') as f:
for chunk in response.iter_content(1024):
f.write(chunk)
else:
print(f"Failed to retrieve image. Status code: {response.status_code}")
for i in range(50):
url = 'http://www.netbian.com/dongman/index_'+str(i+52)+'.htm'
response = requests.get(url)
if response.status_code == 200:
content = response.text
soup = BeautifulSoup(content, 'lxml')
title = soup.title.string
paragraphs = soup.find_all('img')
for paragraph in paragraphs:
src = paragraph['src']
if 'small' in src:
src = src.replace("small", "")
src = src[:49] + src[59:]
local_filename = 'D:\pytest\local_image'+str(uuid.uuid4())+'.jpg'
download_image(src, local_filename)
print(src)
else:
print(f"Failed to retrieve the webpage. Status code: {response.status_code}")