修正mac系统 html转pdf问题

2022-05-25 16:28:08 +08:00
parent 3e5e027025
commit d38ffdab1d
1 changed files with 38 additions and 16 deletions
--- a/start.py
+++ b/start.py
@@ -1,7 +1,9 @@
 # coding=UTF-8
+from importlib.resources import path
 import os, sys
 import requests
 import json
+import pdfkit
 import subprocess
 from bs4 import BeautifulSoup
 from datetime import datetime, timedelta
@@ -213,7 +215,7 @@ def DownHtmlMain(jsonDir, saveHtmlDir):
        print("\r", end="")
        SaveFile(arthtmlsavepath, arthtmlstr)

-        sleep(5)  # 防止下载过快被微信屏蔽，间隔3秒下载一篇
+        sleep(0)  # 防止下载过快被微信屏蔽，间隔3秒下载一篇


 # 把一个文件夹下的html文件都转为pdf
@@ -251,21 +253,41 @@ def PDFDir(htmldir, pdfdir):

 # 把一个Html文件转为pdf
 def PDFOne(htmlpath, pdfpath, skipExists=True, removehtml=True):
-    if skipExists and os.path.exists(pdfpath):
-        print("pdf exists", pdfpath)
-        if removehtml:
-            os.remove(htmlpath)
-        return
-    exepath = "wkhtmltopdf.exe"  # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下
-    cmdlist = []
-    cmdlist.append(" --load-error-handling ignore ")
-    cmdlist.append(" --page-height 200 ")  # 数字可以自己调节，也可以不加这两行
-    cmdlist.append(" --page-width 140 ")
-    cmdlist.append(" " + htmlpath + " ")
-    cmdlist.append(" " + pdfpath + " ")
-    cmdstr = exepath + "".join(cmdlist)
-    print(cmdstr)
-    result = subprocess.check_call(cmdstr, shell=False)
+    print(htmlpath, pdfpath)
+    options = {
+        'page-size': 'Letter',
+        'margin-top': '0.75in',
+        'margin-right': '0.75in',
+        'margin-bottom': '0.75in',
+        'margin-left': '0.75in',
+        'encoding': "UTF-8",
+        'custom-header': [
+            ('Accept-Encoding', 'gzip')
+        ],
+        # 'cookie': [
+        #     ('cookie-empty-value', '""')
+        #     ('cookie-name1', 'cookie-value1'),
+        #     ('cookie-name2', 'cookie-value2'),
+        # ],
+        'no-outline': None,
+        'enable-local-file-access': None
+    }
+    pdfkit.from_file(htmlpath, pdfpath, options=options, verbose=True)
+    # if skipExists and os.path.exists(pdfpath):
+    #     print("pdf exists", pdfpath)
+    #     if removehtml:
+    #         os.remove(htmlpath)
+    #     return
+    # exepath = "wkhtmltopdf.exe"  # 把wkhtmltopdf.exe文件保存到与本py文件相同的目录下
+    # cmdlist = []
+    # cmdlist.append(" --load-error-handling ignore ")
+    # cmdlist.append(" --page-height 200 ")  # 数字可以自己调节，也可以不加这两行
+    # cmdlist.append(" --page-width 140 ")
+    # cmdlist.append(" " + htmlpath + " ")
+    # cmdlist.append(" " + pdfpath + " ")
+    # cmdstr = exepath + "".join(cmdlist)
+    # print(cmdstr)
+    # result = subprocess.check_call(cmdstr, shell=False)
    # stdout,stderr = result.communicate()
    # result.wait() #等待转换完一个再转下一个
    if removehtml: