在阅读 的代码后,观察龙族三漫画图片地址规律,简写专门下载龙族三漫画的小爬虫。
#coding=utf-8import os import urllib2#简单下载器def download(url, save_path): try: with open(save_path, 'wb') as fp: fp.write(urllib2.urlopen(url).read()) except Exception, et: print(et)#定义存储目录save_folder = ".\longzu"if not os.path.exists(save_folder): os.mkdir(save_folder)#漫画图片链接,可通过格式化字符串的方式获取新的图片链接url = 'http://mhpic.zymk.cn/comic/L%2F%E9%BE%99%E6%97%8F%E2%85%A2%2F{0}%E8%AF%9D%2F{1}.jpg-mht.middle'#初始章节chapter = 1#循环下载章节while chapter < 47: #生成章节目录 folder = os.path.join(save_folder,u"第 %d 话"%chapter) if not os.path.exists(folder): os.mkdir(folder) index = 1 while True: image_url = url.format('%.2d'%chapter,index) save_image_name = os.path.join(folder,"%.2d"%index+"."+"jpg") #判断是否到达章节最后一页 try: tmp = urllib2.urlopen(image_url).read() except urllib2.HTTPError: break print "downloading:%s from url:%s" % (save_image_name,image_url) download(image_url, save_image_name) index += 1 chapter += 1