Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

第一步去哪里设置,求回答 #19

Open
LiNengxin opened this issue Feb 11, 2025 · 5 comments
Open

第一步去哪里设置,求回答 #19

LiNengxin opened this issue Feb 11, 2025 · 5 comments
Labels
question Further information is requested

Comments

@LiNengxin
Copy link

我想请问第一步的脚本配置怎么实现
具体操作步骤是什么
from flask import Flask, request, jsonify
import os
import base64
import subprocess
from flask import Flask, send_file, abort
from pypdf import PdfWriter, PdfReader
from pypdf.generic import RectangleObject
import sys

####################################### 配置 #######################################
pdf2zh = "pdf2zh" # 设置pdf2zh指令: 默认为'pdf2zh'
thread_num = 4 # 设置线程数: 默认为4
port_num = 7860 # 设置端口号: 默认为8888
service = 'bing' # 设置翻译服务: 默认为bing
translated_dir = "./translated/" # 设置翻译文件的输出路径(临时路径, 可以在翻译后删除)
config_path = './config.json' # 设置配置文件路径
######################################################################################

def get_absolute_path(path): # 获取绝对路径
if os.path.isabs(path):
return path
else:
return os.path.abspath(path)

def get_file_from_request(request): # 从request中解析pdf文件
data = request.get_json()
path = data.get('filePath')
path = path.replace('\', '/') # 把所有反斜杠\替换为正斜杠/ (Windows->Linux/MacOS)
if not os.path.exists(path):
file_content = data.get('fileContent')
input_path = os.path.join(translated_dir, os.path.basename(path))
if file_content:
if file_content.startswith('data:application/pdf;base64,'): # 移除 Base64 编码中的前缀(如果有)
file_content = file_content[len('data:application/pdf;base64,'):]
file_data = base64.b64decode(file_content) # 解码 Base64 内容
with open(input_path, 'wb') as f:
f.write(file_data)
else:
input_path = path
return input_path

app = Flask(name)
@app.route('/translate', methods=['POST'])
def translate():
print("### translate ###")
input_path = get_file_from_request(request)
try:
os.makedirs(translated_dir, exist_ok=True)
print("### translating ###: ", input_path)

    # 执行pdf2zh翻译, 用户可以自定义命令内容:
    command = [
        pdf2zh,
        input_path,
        '--t', str(thread_num),
        '--output', translated_dir,
        '--service', service
    ]
    subprocess.run(command, check=False)
    abs_translated_dir = get_absolute_path(translated_dir)
    print("abs_translated_dir: ", abs_translated_dir)
    translated_path1 = os.path.join(abs_translated_dir, os.path.basename(input_path).replace('.pdf', '-mono.pdf'))
    translated_path2 = os.path.join(abs_translated_dir, os.path.basename(input_path).replace('.pdf', '-dual.pdf'))
    if not os.path.exists(translated_path1) or not os.path.exists(translated_path2):
        raise Exception("pdf2zh failed to generate translated files")
    return jsonify({'status': 'success', 'translatedPath1': translated_path1, 'translatedPath2': translated_path2}), 200
except Exception as e:
    print(f"Error: {e}")
    return jsonify({'status': 'error', 'message': str(e)}), 500

@app.route('/translatedFile/')
def download(filename):
directory = translated_dir
abs_directory = get_absolute_path(directory)
file_path = os.path.join(abs_directory, filename)
if not os.path.isfile(file_path):
return "File not found", 404
return send_file(file_path, as_attachment=True, download_name=filename)

新增了一个cut pdf函数,用于切割双栏pdf文件

def split_and_merge_pdf(input_pdf, output_pdf):
writer = PdfWriter()
if 'dual' in input_pdf:
reader1_1 = PdfReader(input_pdf)
reader1_2 = PdfReader(input_pdf)
reader2_1 = PdfReader(input_pdf)
reader2_2 = PdfReader(input_pdf)
for i in range(0, len(reader1_1.pages), 2):
page1_1 = reader1_1.pages[i]
page1_2 = reader1_2.pages[i]
page2_1 = reader2_1.pages[i+1]
page2_2 = reader2_2.pages[i+1]

        original_media_box = page1_1.mediabox
        width = original_media_box.width
        height = original_media_box.height

        left_page_1 = page1_1
        left_page_1.mediabox = RectangleObject((0, 0, width / 2, height))
        left_page_2 = page2_1
        left_page_2.mediabox = RectangleObject((0, 0, width / 2, height))

        right_page_1 = page1_2
        right_page_1.mediabox = RectangleObject((width / 2, 0, width, height))
        right_page_2 = page2_2
        right_page_2.mediabox = RectangleObject((width / 2, 0, width, height))

        writer.add_page(left_page_1)
        writer.add_page(left_page_2)
        writer.add_page(right_page_1)
        writer.add_page(right_page_2)
else:
    reader1 = PdfReader(input_pdf)
    reader2 = PdfReader(input_pdf)
    for i in range(len(reader1.pages)):
        page1 = reader1.pages[i]
        page2 = reader2.pages[i]

        original_media_box = page1.mediabox
        width = original_media_box.width
        height = original_media_box.height

        left_page = page1
        left_page.mediabox = RectangleObject((0, 0, width / 2, height))

        right_page = page2
        right_page.mediabox = RectangleObject((width / 2, 0, width, height))

        writer.add_page(left_page)
        writer.add_page(right_page)

with open(output_pdf, "wb") as output_file:
    writer.write(output_file)

新增了一个cut接口,用于切割双栏pdf文件

@app.route('/cut', methods=['POST'])
def cut():
print("### cut ###")
input_path = get_file_from_request(request)
try:
os.makedirs(translated_dir, exist_ok=True)
print("### cutting ###: ", input_path)
abs_translated_dir = get_absolute_path(translated_dir)
translated_path = os.path.join(abs_translated_dir, os.path.basename(input_path).replace('.pdf', '-cut.pdf'))
split_and_merge_pdf(input_path, translated_path)
if not os.path.exists(translated_path):
raise Exception("failed to generate cutted files")
return jsonify({'status': 'success'}), 200
except Exception as e:
print(f"Error: {e}")
return jsonify({'status': 'error', 'message': str(e)}), 500

if name == 'main':
if len(sys.argv) > 1: # 命令行参数1: service
service = sys.argv[1]
if len(sys.argv) > 2: # 命令行参数2: thread_num
thread_num = int(sys.argv[2])
app.run(host='0.0.0.0', port=port_num)

@guaguastandup
Copy link
Owner

guaguastandup commented Feb 11, 2025

看你贴的代码,你已经修改配置了呀?然后命令行运行这段python代码,挂在后端就好了。

@LiNengxin
Copy link
Author

第一个问题:运行脚本以后(修改部分为8888改为7860),
pycharm运行
`E:\Users\LNX\anaconda3\envs\pdf2zh\python.exe E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\111.py

  • Serving Flask app '111'
  • Debug mode: off
    WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
  • Running on all addresses (0.0.0.0)
  • Running on http://127.0.0.1:7860
  • Running on http://192.168.1.102:7860
    Press CTRL+C to quit但是,浏览器出现Not Found
    The requested URL was not found on the server. If you entered the URL manually please check your spelling and try again.`

Image
第二个问题:在zotero中运行,出现
Image
同时pycharm的运行脚本出现这种错误
`E:\Users\LNX\anaconda3\envs\pdf2zh\python.exe E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\111.py

  • Serving Flask app '111'
  • Debug mode: off
    WARNING: This is a development server. Do not use it in a production deployment. Use a production WSGI server instead.
  • Running on all addresses (0.0.0.0)
  • Running on http://127.0.0.1:7860
  • Running on http://192.168.1.102:7860
    Press CTRL+C to quit

translate

translating ###: G:/OneDrive/Zotero/Storage/IEE IEEE_VTS_Challenge/2023/Pre_2018_Model Based Energy Management and State Estimation for the Robotic Electric Vehicle ROboMObil.pdf

Traceback (most recent call last):
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 86, in run_code
exec(code, run_globals)
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Scripts\pdf2zh.exe_main
.py", line 4, in
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py", line 14, in
from pdf2zh import version, log
ImportError: cannot import name 'version' from partially initialized module 'pdf2zh' (most likely due to a circular import) (E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py)
127.0.0.1 - - [12/Feb/2025 14:25:51] "POST /translate HTTP/1.1" 500 -
abs_translated_dir: E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\translated
Error: pdf2zh failed to generate translated files
127.0.0.1 - - [12/Feb/2025 14:28:37] "GET / HTTP/1.1" 404 -
192.168.1.102 - - [12/Feb/2025 14:30:28] "GET / HTTP/1.1" 404 -

translate

translating ###: G:/OneDrive/Zotero/Storage/IEE IEEE_VTS_Challenge/2023/Pre_2018_Model Based Energy Management and State Estimation for the Robotic Electric Vehicle ROboMObil.pdf

Traceback (most recent call last):
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 196, in _run_module_as_main
return _run_code(code, main_globals, None,
File "E:\Users\LNX\anaconda3\envs\pdf2zh\lib\runpy.py", line 86, in run_code
exec(code, run_globals)
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Scripts\pdf2zh.exe_main
.py", line 4, in
File "E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py", line 14, in
from pdf2zh import version, log
ImportError: cannot import name 'version' from partially initialized module 'pdf2zh' (most likely due to a circular import) (E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\pdf2zh.py)
127.0.0.1 - - [12/Feb/2025 14:32:04] "POST /translate HTTP/1.1" 500 -
abs_translated_dir: E:\Users\LNX\anaconda3\envs\pdf2zh\Lib\site-packages\pdf2zh\translated
Error: pdf2zh failed to generate translated files
`

@guaguastandup
Copy link
Owner

这个不需要在浏览器打开,挂在后台就好了。
另外,你需要先确保一下你本地安装的pdf2zh是可用的。目前的报错是因为pdf2zh翻译失败了。
ImportError: cannot import name 'version' from partially initialized module 'pdf2zh' (most likely due to a circular import)

@LiNengxin
Copy link
Author

我将pdf2zh安装在通过conda创建的虚拟环境里面,如果通过激活conda环境后,使用pdf2zh -i 可以在浏览器正常使用,也能正常翻译。
那现在的问题是不是出在,我将pdf2zh安装在了虚拟环境里面

@guaguastandup
Copy link
Owner

你可以试试打开命令行工具(cmd),先在命令行激活虚拟环境,然后在命令行工具里执行这个脚本试试。

或者尝试另一个方法:
把配置里的pdf2zh = 'pdf2zh'改为pdf2zh = 'conda activate xxx && pdf2zh'试试,xxx是你的虚拟环境名

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
question Further information is requested
Projects
None yet
Development

No branches or pull requests

2 participants