defsave_img(root,img_url): path=root+img_url.split('/')[-1] try: ifnot osp.exists(path): s = requests.session() s.keep_alive = False# 关闭多余连接 r = s.get(img_url) # 你需要的网址 #r=requests.get(img_url) withopen(path,'wb') as f: f.write(r.content) f.close() else: print(path+"文件已存在!") return0 except Exception as e: print(img_url+", 爬取失败!") return1 print(img_url+"已下载") return0
deffindimg(line,ori_pre): try: img = re.findall(f"({ori_pre}.*?(\.jfif|\.svg|\.webp|\.gif|\.jpeg|\.jpg|\.png|\.PNG|\.JPEG|\.JPG))",line)[0][0] name = img.split('/')[-1] except Exception as e: print("已自动忽略:",line) img,name = None,None return img,name
defchangeurl(ori_root,save_root,down_root,ori_pre,new_pre): assert ori_pre[-1]=='/'and new_pre[-1]=="/" if osp.isdir(ori_root): files = os.listdir(ori_root) else: file = osp.basename(ori_root) ori_root = ori_root.split(file)[0] files = [file] makedir(save_root) makedir(down_root) for file in tqdm(files): print("Starting... ",file) withopen(ori_root+file,'r',encoding = 'utf-8') as f: content = f.readlines() withopen(save_root+file,'w',encoding='utf-8') as f: for line in content: if ori_pre in line: img,name = findimg(line,ori_pre) if img!=None: change = new_pre + name print(line,"==>",line.replace(img,change)) code = save_img(down_root,img) if code == 0: line = line.replace(img,change) f.write(line)