diff --git a/start.py b/start.py index 5061f2c..174e3ff 100644 --- a/start.py +++ b/start.py @@ -1,7 +1,9 @@ # coding=UTF-8 +from importlib.resources import path import os, sys import requests import json +import pdfkit import subprocess from bs4 import BeautifulSoup from datetime import datetime, timedelta @@ -213,7 +215,7 @@ def DownHtmlMain(jsonDir, saveHtmlDir): print("\r", end="") SaveFile(arthtmlsavepath, arthtmlstr) - sleep(5) # 防止下载过快被微信屏蔽,间隔3秒下载一篇 + sleep(0) # 防止下载过快被微信屏蔽,间隔3秒下载一篇 # 把一个文件夹下的html文件都转为pdf @@ -251,21 +253,41 @@ def PDFDir(htmldir, pdfdir): # 把一个Html文件转为pdf def PDFOne(htmlpath, pdfpath, skipExists=True, removehtml=True): - if skipExists and os.path.exists(pdfpath): - print("pdf exists", pdfpath) - if removehtml: - os.remove(htmlpath) - return - exepath = "wkhtmltopdf.exe" # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下 - cmdlist = [] - cmdlist.append(" --load-error-handling ignore ") - cmdlist.append(" --page-height 200 ") # 数字可以自己调节,也可以不加这两行 - cmdlist.append(" --page-width 140 ") - cmdlist.append(" " + htmlpath + " ") - cmdlist.append(" " + pdfpath + " ") - cmdstr = exepath + "".join(cmdlist) - print(cmdstr) - result = subprocess.check_call(cmdstr, shell=False) + print(htmlpath, pdfpath) + options = { + 'page-size': 'Letter', + 'margin-top': '0.75in', + 'margin-right': '0.75in', + 'margin-bottom': '0.75in', + 'margin-left': '0.75in', + 'encoding': "UTF-8", + 'custom-header': [ + ('Accept-Encoding', 'gzip') + ], + # 'cookie': [ + # ('cookie-empty-value', '""') + # ('cookie-name1', 'cookie-value1'), + # ('cookie-name2', 'cookie-value2'), + # ], + 'no-outline': None, + 'enable-local-file-access': None + } + pdfkit.from_file(htmlpath, pdfpath, options=options, verbose=True) + # if skipExists and os.path.exists(pdfpath): + # print("pdf exists", pdfpath) + # if removehtml: + # os.remove(htmlpath) + # return + # exepath = "wkhtmltopdf.exe" # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下 + # cmdlist = [] + # cmdlist.append(" --load-error-handling ignore ") + # cmdlist.append(" --page-height 200 ") # 数字可以自己调节,也可以不加这两行 + # cmdlist.append(" --page-width 140 ") + # cmdlist.append(" " + htmlpath + " ") + # cmdlist.append(" " + pdfpath + " ") + # cmdstr = exepath + "".join(cmdlist) + # print(cmdstr) + # result = subprocess.check_call(cmdstr, shell=False) # stdout,stderr = result.communicate() # result.wait() #等待转换完一个再转下一个 if removehtml: