修正mac系统 html转pdf问题

2022-05-25 16:28:08 +08:00
parent 3e5e027025
commit d38ffdab1d
1 changed files with 38 additions and 16 deletions
--- a/start.py
+++ b/start.py
@@ -1,7 +1,9 @@
 # coding=UTF-8
 from importlib.resources import path
 import os, sys
 import requests
 import json
 import pdfkit
 import subprocess
 from bs4 import BeautifulSoup
 from datetime import datetime, timedelta
@@ -213,7 +215,7 @@ def DownHtmlMain(jsonDir, saveHtmlDir):
        print("\r", end="")
        SaveFile(arthtmlsavepath, arthtmlstr)
-        sleep(5)  # 防止下载过快被微信屏蔽，间隔3秒下载一篇
+        sleep(0)  # 防止下载过快被微信屏蔽，间隔3秒下载一篇
 # 把一个文件夹下的html文件都转为pdf
@@ -251,21 +253,41 @@ def PDFDir(htmldir, pdfdir):
 # 把一个Html文件转为pdf
 def PDFOne(htmlpath, pdfpath, skipExists=True, removehtml=True):
-    if skipExists and os.path.exists(pdfpath):
+    print(htmlpath, pdfpath)
-        print("pdf exists", pdfpath)
+    options = {
-        if removehtml:
+        'page-size': 'Letter',
-            os.remove(htmlpath)
+        'margin-top': '0.75in',
-        return
+        'margin-right': '0.75in',
-    exepath = "wkhtmltopdf.exe"  # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下
+        'margin-bottom': '0.75in',
-    cmdlist = []
+        'margin-left': '0.75in',
-    cmdlist.append(" --load-error-handling ignore ")
+        'encoding': "UTF-8",
-    cmdlist.append(" --page-height 200 ")  # 数字可以自己调节，也可以不加这两行
+        'custom-header': [
-    cmdlist.append(" --page-width 140 ")
+            ('Accept-Encoding', 'gzip')
-    cmdlist.append(" " + htmlpath + " ")
+        ],
-    cmdlist.append(" " + pdfpath + " ")
+        # 'cookie': [
-    cmdstr = exepath + "".join(cmdlist)
+        #     ('cookie-empty-value', '""')
-    print(cmdstr)
+        #     ('cookie-name1', 'cookie-value1'),
-    result = subprocess.check_call(cmdstr, shell=False)
+        #     ('cookie-name2', 'cookie-value2'),
        # ],
        'no-outline': None,
        'enable-local-file-access': None
    }
    pdfkit.from_file(htmlpath, pdfpath, options=options, verbose=True)
    # if skipExists and os.path.exists(pdfpath):
    #     print("pdf exists", pdfpath)
    #     if removehtml:
    #         os.remove(htmlpath)
    #     return
    # exepath = "wkhtmltopdf.exe"  # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下
    # cmdlist = []
    # cmdlist.append(" --load-error-handling ignore ")
    # cmdlist.append(" --page-height 200 ")  # 数字可以自己调节，也可以不加这两行
    # cmdlist.append(" --page-width 140 ")
    # cmdlist.append(" " + htmlpath + " ")
    # cmdlist.append(" " + pdfpath + " ")
    # cmdstr = exepath + "".join(cmdlist)
    # print(cmdstr)
    # result = subprocess.check_call(cmdstr, shell=False)
    # stdout,stderr = result.communicate()
    # result.wait() #等待转换完一个再转下一个
    if removehtml: