第一步去哪里设置，求回答 #19

LiNengxin · 2025-02-11T10:53:00Z

我想请问第一步的脚本配置怎么实现
具体操作步骤是什么
from flask import Flask, request, jsonify
import os
import base64
import subprocess
from flask import Flask, send_file, abort
from pypdf import PdfWriter, PdfReader
from pypdf.generic import RectangleObject
import sys

####################################### 配置 #######################################
pdf2zh = "pdf2zh" # 设置pdf2zh指令: 默认为'pdf2zh'
thread_num = 4 # 设置线程数: 默认为4
port_num = 7860 # 设置端口号: 默认为8888
service = 'bing' # 设置翻译服务: 默认为bing
translated_dir = "./translated/" # 设置翻译文件的输出路径(临时路径, 可以在翻译后删除)
config_path = './config.json' # 设置配置文件路径
######################################################################################

def get_absolute_path(path): # 获取绝对路径
if os.path.isabs(path):
return path
else:
return os.path.abspath(path)

def get_file_from_request(request): # 从request中解析pdf文件
data = request.get_json()
path = data.get('filePath')
path = path.replace('\', '/') # 把所有反斜杠\替换为正斜杠/ (Windows->Linux/MacOS)
if not os.path.exists(path):
file_content = data.get('fileContent')
input_path = os.path.join(translated_dir, os.path.basename(path))
if file_content:
if file_content.startswith('data:application/pdf;base64,'): # 移除 Base64 编码中的前缀(如果有)
file_content = file_content[len('data:application/pdf;base64,'):]
file_data = base64.b64decode(file_content) # 解码 Base64 内容
with open(input_path, 'wb') as f:
f.write(file_data)
else:
input_path = path
return input_path

app = Flask(name)
@app.route('/translate', methods=['POST'])
def translate():
print("### translate ###")
input_path = get_file_from_request(request)
try:
os.makedirs(translated_dir, exist_ok=True)
print("### translating ###: ", input_path)

    # 执行pdf2zh翻译, 用户可以自定义命令内容:
    command = [
        pdf2zh,
        input_path,
        '--t', str(thread_num),
        '--output', translated_dir,
        '--service', service
    ]
    subprocess.run(command, check=False)
    abs_translated_dir = get_absolute_path(translated_dir)
    print("abs_translated_dir: ", abs_translated_dir)
    translated_path1 = os.path.join(abs_translated_dir, os.path.basename(input_path).replace('.pdf', '-mono.pdf'))
    translated_path2 = os.path.join(abs_translated_dir, os.path.basename(input_path).replace('.pdf', '-dual.pdf'))
    if not os.path.exists(translated_path1) or not os.path.exists(translated_path2):
        raise Exception("pdf2zh failed to generate translated files")
    return jsonify({'status': 'success', 'translatedPath1': translated_path1, 'translatedPath2': translated_path2}), 200
except Exception as e:
    print(f"Error: {e}")
    return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route('/translatedFile/')
def download(filename):
directory = translated_dir
abs_directory = get_absolute_path(directory)
file_path = os.path.join(abs_directory, filename)
if not os.path.isfile(file_path):
return "File not found", 404
return send_file(file_path, as_attachment=True, download_name=filename)

新增了一个cut pdf函数，用于切割双栏pdf文件

def split_and_merge_pdf(input_pdf, output_pdf):
writer = PdfWriter()
if 'dual' in input_pdf:
reader1_1 = PdfReader(input_pdf)
reader1_2 = PdfReader(input_pdf)
reader2_1 = PdfReader(input_pdf)
reader2_2 = PdfReader(input_pdf)
for i in range(0, len(reader1_1.pages), 2):
page1_1 = reader1_1.pages[i]
page1_2 = reader1_2.pages[i]
page2_1 = reader2_1.pages[i+1]
page2_2 = reader2_2.pages[i+1]

        original_media_box = page1_1.mediabox
        width = original_media_box.width
        height = original_media_box.height

        left_page_1 = page1_1
        left_page_1.mediabox = RectangleObject((0, 0, width / 2, height))
        left_page_2 = page2_1
        left_page_2.mediabox = RectangleObject((0, 0, width / 2, height))

        right_page_1 = page1_2
        right_page_1.mediabox = RectangleObject((width / 2, 0, width, height))
        right_page_2 = page2_2
        right_page_2.mediabox = RectangleObject((width / 2, 0, width, height))

        writer.add_page(left_page_1)
        writer.add_page(left_page_2)
        writer.add_page(right_page_1)
        writer.add_page(right_page_2)
else:
    reader1 = PdfReader(input_pdf)
    reader2 = PdfReader(input_pdf)
    for i in range(len(reader1.pages)):
        page1 = reader1.pages[i]
        page2 = reader2.pages[i]

        original_media_box = page1.mediabox
        width = original_media_box.width
        height = original_media_box.height

        left_page = page1
        left_page.mediabox = RectangleObject((0, 0, width / 2, height))

        right_page = page2
        right_page.mediabox = RectangleObject((width / 2, 0, width, height))

        writer.add_page(left_page)
        writer.add_page(right_page)

with open(output_pdf, "wb") as output_file:
    writer.write(output_file)

新增了一个cut接口，用于切割双栏pdf文件

@app.route('/cut', methods=['POST'])
def cut():
print("### cut ###")
input_path = get_file_from_request(request)
try:
os.makedirs(translated_dir, exist_ok=True)
print("### cutting ###: ", input_path)
abs_translated_dir = get_absolute_path(translated_dir)
translated_path = os.path.join(abs_translated_dir, os.path.basename(input_path).replace('.pdf', '-cut.pdf'))
split_and_merge_pdf(input_path, translated_path)
if not os.path.exists(translated_path):
raise Exception("failed to generate cutted files")
return jsonify({'status': 'success'}), 200
except Exception as e:
print(f"Error: {e}")
return jsonify({'status': 'error', 'message': str(e)}), 500

if name == 'main':
if len(sys.argv) > 1: # 命令行参数1: service
service = sys.argv[1]
if len(sys.argv) > 2: # 命令行参数2: thread_num
thread_num = int(sys.argv[2])
app.run(host='0.0.0.0', port=port_num)

The text was updated successfully, but these errors were encountered:

guaguastandup · 2025-02-11T16:12:40Z

看你贴的代码，你已经修改配置了呀？然后命令行运行这段python代码，挂在后端就好了。

LiNengxin · 2025-02-12T06:36:10Z

第一个问题：运行脚本以后（修改部分为8888改为7860），
pycharm运行
`E:\Users\LNX\anaconda3\envs\pdf2zh\python.exe E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\111.py

Serving Flask app '111'
Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
Running on all addresses (0.0.0.0)
Running on http://127.0.0.1:7860
Running on http://192.168.1.102:7860
Press CTRL+C to quit但是，浏览器出现Not Found
The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.`

第二个问题：在zotero中运行，出现

同时pycharm的运行脚本出现这种错误
`E:\Users\LNX\anaconda3\envs\pdf2zh\python.exe E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\111.py

Serving Flask app '111'
Debug mode: off
WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
Running on all addresses (0.0.0.0)
Running on http://127.0.0.1:7860
Running on http://192.168.1.102:7860
Press CTRL+C to quit

translate

translating ###: G:/OneDrive/Zotero/Storage/IEE IEEE_VTS_Challenge/2023/Pre_2018_Model Based Energy Management and State Estimation for the Robotic Electric Vehicle ROboMObil.pdf

Traceback (most recent call last):
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 86, in run_code
exec(code, run_globals)
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Scripts\pdf2zh.exe_main.py", line 4, in
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py", line 14, in
from pdf2zh import version, log
ImportError: cannot import name 'version' from partially initialized module 'pdf2zh' (most likely due to a circular import) (E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py)
127.0.0.1 - - [12/Feb/2025 14:25:51] "POST /translate HTTP/1.1" 500 -
abs_translated_dir: E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\translated
Error: pdf2zh failed to generate translated files
127.0.0.1 - - [12/Feb/2025 14:28:37] "GET / HTTP/1.1" 404 -
192.168.1.102 - - [12/Feb/2025 14:30:28] "GET / HTTP/1.1" 404 -

translate

translating ###: G:/OneDrive/Zotero/Storage/IEE IEEE_VTS_Challenge/2023/Pre_2018_Model Based Energy Management and State Estimation for the Robotic Electric Vehicle ROboMObil.pdf

Traceback (most recent call last):
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 86, in run_code
exec(code, run_globals)
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Scripts\pdf2zh.exe_main.py", line 4, in
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py", line 14, in
from pdf2zh import version, log
ImportError: cannot import name 'version' from partially initialized module 'pdf2zh' (most likely due to a circular import) (E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py)
127.0.0.1 - - [12/Feb/2025 14:32:04] "POST /translate HTTP/1.1" 500 -
abs_translated_dir: E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\translated
Error: pdf2zh failed to generate translated files
`

guaguastandup · 2025-02-12T06:45:36Z

这个不需要在浏览器打开，挂在后台就好了。
另外，你需要先确保一下你本地安装的pdf2zh是可用的。目前的报错是因为pdf2zh翻译失败了。
ImportError: cannot import name 'version' from partially initialized module 'pdf2zh' (most likely due to a circular import)

LiNengxin · 2025-02-12T06:51:58Z

我将pdf2zh安装在通过conda创建的虚拟环境里面，如果通过激活conda环境后，使用pdf2zh -i 可以在浏览器正常使用，也能正常翻译。
那现在的问题是不是出在，我将pdf2zh安装在了虚拟环境里面

guaguastandup · 2025-02-12T07:04:00Z

你可以试试打开命令行工具(cmd)，先在命令行激活虚拟环境，然后在命令行工具里执行这个脚本试试。

或者尝试另一个方法：
把配置里的pdf2zh = 'pdf2zh'改为pdf2zh = 'conda activate xxx && pdf2zh'试试，xxx是你的虚拟环境名

guaguastandup mentioned this issue Feb 18, 2025

conda配置的pdf2zh服务，如何修改server.py呀 #27

Open

guaguastandup added the question Further information is requested label Feb 21, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

第一步去哪里设置，求回答 #19

第一步去哪里设置，求回答 #19

LiNengxin commented Feb 11, 2025

guaguastandup commented Feb 11, 2025 •

edited

Loading

LiNengxin commented Feb 12, 2025

guaguastandup commented Feb 12, 2025

LiNengxin commented Feb 12, 2025

guaguastandup commented Feb 12, 2025

第一步去哪里设置，求回答 #19

第一步去哪里设置，求回答 #19

Comments

LiNengxin commented Feb 11, 2025

新增了一个cut pdf函数，用于切割双栏pdf文件

新增了一个cut接口，用于切割双栏pdf文件

guaguastandup commented Feb 11, 2025 • edited Loading

LiNengxin commented Feb 12, 2025

translate

translating ###: G:/OneDrive/Zotero/Storage/IEE IEEE_VTS_Challenge/2023/Pre_2018_Model Based Energy Management and State Estimation for the Robotic Electric Vehicle ROboMObil.pdf

translate

translating ###: G:/OneDrive/Zotero/Storage/IEE IEEE_VTS_Challenge/2023/Pre_2018_Model Based Energy Management and State Estimation for the Robotic Electric Vehicle ROboMObil.pdf

guaguastandup commented Feb 12, 2025

LiNengxin commented Feb 12, 2025

guaguastandup commented Feb 12, 2025

guaguastandup commented Feb 11, 2025 •

edited

Loading