修正mac系统 html转pdf问题

This commit is contained in:
rucky 2022-05-25 16:28:08 +08:00
parent 3e5e027025
commit d38ffdab1d

View File

@ -1,7 +1,9 @@
# coding=UTF-8 # coding=UTF-8
from importlib.resources import path
import os, sys import os, sys
import requests import requests
import json import json
import pdfkit
import subprocess import subprocess
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from datetime import datetime, timedelta from datetime import datetime, timedelta
@ -213,7 +215,7 @@ def DownHtmlMain(jsonDir, saveHtmlDir):
print("\r", end="") print("\r", end="")
SaveFile(arthtmlsavepath, arthtmlstr) SaveFile(arthtmlsavepath, arthtmlstr)
sleep(5) # 防止下载过快被微信屏蔽间隔3秒下载一篇 sleep(0) # 防止下载过快被微信屏蔽间隔3秒下载一篇
# 把一个文件夹下的html文件都转为pdf # 把一个文件夹下的html文件都转为pdf
@ -251,21 +253,41 @@ def PDFDir(htmldir, pdfdir):
# 把一个Html文件转为pdf # 把一个Html文件转为pdf
def PDFOne(htmlpath, pdfpath, skipExists=True, removehtml=True): def PDFOne(htmlpath, pdfpath, skipExists=True, removehtml=True):
if skipExists and os.path.exists(pdfpath): print(htmlpath, pdfpath)
print("pdf exists", pdfpath) options = {
if removehtml: 'page-size': 'Letter',
os.remove(htmlpath) 'margin-top': '0.75in',
return 'margin-right': '0.75in',
exepath = "wkhtmltopdf.exe" # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下 'margin-bottom': '0.75in',
cmdlist = [] 'margin-left': '0.75in',
cmdlist.append(" --load-error-handling ignore ") 'encoding': "UTF-8",
cmdlist.append(" --page-height 200 ") # 数字可以自己调节,也可以不加这两行 'custom-header': [
cmdlist.append(" --page-width 140 ") ('Accept-Encoding', 'gzip')
cmdlist.append(" " + htmlpath + " ") ],
cmdlist.append(" " + pdfpath + " ") # 'cookie': [
cmdstr = exepath + "".join(cmdlist) # ('cookie-empty-value', '""')
print(cmdstr) # ('cookie-name1', 'cookie-value1'),
result = subprocess.check_call(cmdstr, shell=False) # ('cookie-name2', 'cookie-value2'),
# ],
'no-outline': None,
'enable-local-file-access': None
}
pdfkit.from_file(htmlpath, pdfpath, options=options, verbose=True)
# if skipExists and os.path.exists(pdfpath):
# print("pdf exists", pdfpath)
# if removehtml:
# os.remove(htmlpath)
# return
# exepath = "wkhtmltopdf.exe" # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下
# cmdlist = []
# cmdlist.append(" --load-error-handling ignore ")
# cmdlist.append(" --page-height 200 ") # 数字可以自己调节,也可以不加这两行
# cmdlist.append(" --page-width 140 ")
# cmdlist.append(" " + htmlpath + " ")
# cmdlist.append(" " + pdfpath + " ")
# cmdstr = exepath + "".join(cmdlist)
# print(cmdstr)
# result = subprocess.check_call(cmdstr, shell=False)
# stdout,stderr = result.communicate() # stdout,stderr = result.communicate()
# result.wait() #等待转换完一个再转下一个 # result.wait() #等待转换完一个再转下一个
if removehtml: if removehtml: