From 7e4e77d9583b883d3df53c322b3d8e93c58660db Mon Sep 17 00:00:00 2001 From: LeLe86 <251192913@qq.com> Date: Fri, 3 Jan 2020 20:41:12 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=BE=AE=E4=BF=A1=E6=94=B9?= =?UTF-8?q?=E7=89=88=E5=90=8E=E6=AD=A3=E6=96=87=E6=97=A0=E6=B3=95=E6=AD=A3?= =?UTF-8?q?=E5=B8=B8=E6=98=BE=E7=A4=BA=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.json | 2 +- start.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/config.json b/config.json index 8aabe69..4fd562c 100644 --- a/config.json +++ b/config.json @@ -1,5 +1,5 @@ { - "jsonDir": "C:/vWeChatFiles/rawlist/tmp1", + "jsonDir": "C:/Users/kklwin10/Desktop/Dump-0103-20-14-29", "htmlDir": "c:/vWeChatFiles/html/", "pdfDir": "c:/vWeChatFiles/pdf/" } \ No newline at end of file diff --git a/start.py b/start.py index 1286fa7..8f34311 100644 --- a/start.py +++ b/start.py @@ -96,6 +96,7 @@ def ChangeImgSrc(htmltxt,saveimgdir,htmlname): else : img.attrs["src"] = "" ChangeCssSrc(bs) #修改link标签 + ChangeContent(bs) #修改js_content的style,使正文能正常显示 return str(bs) #将BeautifulSoup对象再转换为字符串,用于保存 def ChangeCssSrc(bs): @@ -105,7 +106,11 @@ def ChangeCssSrc(bs): if href.startswith("//"): newhref = "http:" + href link.attrs["href"] = newhref - + +def ChangeContent(bs): + jscontent = bs.find(id="js_content") + jscontent.attrs["style"]="" + #文章类 class Article(): def __init__(self,url,pubdate,idx,title):