修正mac系统 html转pdf问题
This commit is contained in:
parent
3e5e027025
commit
d38ffdab1d
54
start.py
54
start.py
@ -1,7 +1,9 @@
|
|||||||
# coding=UTF-8
|
# coding=UTF-8
|
||||||
|
from importlib.resources import path
|
||||||
import os, sys
|
import os, sys
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
import pdfkit
|
||||||
import subprocess
|
import subprocess
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
@ -213,7 +215,7 @@ def DownHtmlMain(jsonDir, saveHtmlDir):
|
|||||||
print("\r", end="")
|
print("\r", end="")
|
||||||
SaveFile(arthtmlsavepath, arthtmlstr)
|
SaveFile(arthtmlsavepath, arthtmlstr)
|
||||||
|
|
||||||
sleep(5) # 防止下载过快被微信屏蔽,间隔3秒下载一篇
|
sleep(0) # 防止下载过快被微信屏蔽,间隔3秒下载一篇
|
||||||
|
|
||||||
|
|
||||||
# 把一个文件夹下的html文件都转为pdf
|
# 把一个文件夹下的html文件都转为pdf
|
||||||
@ -251,21 +253,41 @@ def PDFDir(htmldir, pdfdir):
|
|||||||
|
|
||||||
# 把一个Html文件转为pdf
|
# 把一个Html文件转为pdf
|
||||||
def PDFOne(htmlpath, pdfpath, skipExists=True, removehtml=True):
|
def PDFOne(htmlpath, pdfpath, skipExists=True, removehtml=True):
|
||||||
if skipExists and os.path.exists(pdfpath):
|
print(htmlpath, pdfpath)
|
||||||
print("pdf exists", pdfpath)
|
options = {
|
||||||
if removehtml:
|
'page-size': 'Letter',
|
||||||
os.remove(htmlpath)
|
'margin-top': '0.75in',
|
||||||
return
|
'margin-right': '0.75in',
|
||||||
exepath = "wkhtmltopdf.exe" # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下
|
'margin-bottom': '0.75in',
|
||||||
cmdlist = []
|
'margin-left': '0.75in',
|
||||||
cmdlist.append(" --load-error-handling ignore ")
|
'encoding': "UTF-8",
|
||||||
cmdlist.append(" --page-height 200 ") # 数字可以自己调节,也可以不加这两行
|
'custom-header': [
|
||||||
cmdlist.append(" --page-width 140 ")
|
('Accept-Encoding', 'gzip')
|
||||||
cmdlist.append(" " + htmlpath + " ")
|
],
|
||||||
cmdlist.append(" " + pdfpath + " ")
|
# 'cookie': [
|
||||||
cmdstr = exepath + "".join(cmdlist)
|
# ('cookie-empty-value', '""')
|
||||||
print(cmdstr)
|
# ('cookie-name1', 'cookie-value1'),
|
||||||
result = subprocess.check_call(cmdstr, shell=False)
|
# ('cookie-name2', 'cookie-value2'),
|
||||||
|
# ],
|
||||||
|
'no-outline': None,
|
||||||
|
'enable-local-file-access': None
|
||||||
|
}
|
||||||
|
pdfkit.from_file(htmlpath, pdfpath, options=options, verbose=True)
|
||||||
|
# if skipExists and os.path.exists(pdfpath):
|
||||||
|
# print("pdf exists", pdfpath)
|
||||||
|
# if removehtml:
|
||||||
|
# os.remove(htmlpath)
|
||||||
|
# return
|
||||||
|
# exepath = "wkhtmltopdf.exe" # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下
|
||||||
|
# cmdlist = []
|
||||||
|
# cmdlist.append(" --load-error-handling ignore ")
|
||||||
|
# cmdlist.append(" --page-height 200 ") # 数字可以自己调节,也可以不加这两行
|
||||||
|
# cmdlist.append(" --page-width 140 ")
|
||||||
|
# cmdlist.append(" " + htmlpath + " ")
|
||||||
|
# cmdlist.append(" " + pdfpath + " ")
|
||||||
|
# cmdstr = exepath + "".join(cmdlist)
|
||||||
|
# print(cmdstr)
|
||||||
|
# result = subprocess.check_call(cmdstr, shell=False)
|
||||||
# stdout,stderr = result.communicate()
|
# stdout,stderr = result.communicate()
|
||||||
# result.wait() #等待转换完一个再转下一个
|
# result.wait() #等待转换完一个再转下一个
|
||||||
if removehtml:
|
if removehtml:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user