#coding=utf-8
import urllib.request
from bs4 import BeautifulSoup
来自urllib import error
导入re
ls=[‘zhenrenxiu ‘,’ meinv ‘,’ lianglichemo ‘,’ rentiyishu ‘,’ xiaohua’]
efvalidatetitle(title ) :
rstr=r ‘壮实的大衣/\\\:\*\? \’\\”|]’#’/3360*? ‘|’
new_title=re.sub(rstr,’ _ ‘,title )将被替换为下划线
return new_title
forjinrange(1,60000 ) :
URL _ origin=’ http://www.7160.com/Xiaohua/’ str (j ) )。
try:
page _ obj=urllib.request.urlopen (URL _ origin ) )
page _ soup=beautiful soup (page _ obj,’ lxml ‘ )
total _ page _ obj=page _ soup.find (text=re.com pile (‘共’) ).string
pattern=re.compile(r’\d ‘ ) )。
match=pattern.search (total _ page _ obj ) )。
if match==None:
total_page=0;
else:
total_page=match.group (;
forIinrange(1,int ) total_page ) ) :
if i==1 :
url=url_origin ‘/index.html ‘
else:
url=url_origin ‘/index_’ str(i ) ‘.html ‘
request=urllib.request.request (URL )
try:
RES=urllib.request.urlopen (request )
soup=beautifulsoup(RES,’ lxml ‘ ) ) ) ) ) ) ) ) )。
<p极速赛车买前5名的方法ib.request.urlopen (request )
soup=beautifulsoup(RES,’ lxml ‘ ) ) ) ) ) ) ) ) )。
title _ obj=soup.find (attrs={ ‘ class ‘ : ‘ picmainer ‘ } )
if title_obj is not None:
是打印(URL )
title=title_obj.h1.string
content=soup.find(img ) )。
src=content.get(src ) ) )。
file_name=validatetitle(title ) ‘.jpg ‘
urllib.request.URLretrieve(src,’ D://img2/’ file_name ) )。
print(file_name )保存成功) )
except Exception as e:
print (异常) str (j ) )
except Exception as e:
print (异常) str (j ) )