macOS 如何自动翻译一本书

假设书的原始内容来自 pdf,首先通过 pdftotext 获取 pdf 的文本内容,储存为 book.txt:

1
pdftotext book.pdf txt/book.txt

使用 python 配合 DeepLX,储存翻译后的结果为 translated_book.txt:

1
python trans_book.py --i txt/book.txt --o txt/translated_book.txt

这是 trans_book.py 的内容,其中 https.txt 是一些可用的 DeepLX 服务:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import requests
import os
import random
import argparse
from datetime import datetime
import time

LOCAL_API_URL = "http://localhost:1188/translate"
API_TOKEN = "your_access_token"
current_path = os.path.dirname(os.path.realpath(__file__))
SERVERS_FILE = "https.txt"
USE_LOCAL_ONLY = False
MAX_RETRIES = 3
RETRY_DELAY = 5 # seconds
TIMEOUT = 30 # seconds

def load_servers():
with open(SERVERS_FILE, 'r') as f:
return [line.strip() for line in f if line.strip().startswith('https://')]

def save_servers(servers):
with open(SERVERS_FILE, 'w') as f:
for server in servers:
f.write(f"{server}\n")

def translate_text(text, source_lang, target_lang="ZH"):
servers = [LOCAL_API_URL] if LOCAL_API_URL.startswith('https://') else []
if not USE_LOCAL_ONLY:
servers += load_servers()
random.shuffle(servers)

headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_TOKEN}"
}
data = {
"text": text,
"source_lang": source_lang,
"target_lang": target_lang
}

for retry in range(MAX_RETRIES):
for server in servers:
try:
response = requests.post(f"{server}/translate", json=data, headers=headers, timeout=TIMEOUT)
response.raise_for_status()
result = response.json()
if "data" in result and result["data"]:
print(f"Successfully translated using {server}")
return result, server
else:
print(f"Server {server} returned empty result. Trying next server.")
except requests.exceptions.RequestException as e:
print(f"Failed to connect to {server}: {e}")

if servers:
print(f"All servers failed. Retrying in {RETRY_DELAY} seconds... (Attempt {retry + 1}/{MAX_RETRIES})")
time.sleep(RETRY_DELAY)
else:
print("No servers available. Aborting.")
break

print("Max retries reached or no servers available. Translation failed.")
return None, None

def remove_unavailable_server(unavailable_server):
servers = load_servers()
if unavailable_server in servers:
servers.remove(unavailable_server)
save_servers(servers)
print(f"Removed unavailable server {unavailable_server} from {SERVERS_FILE}")

def save_translation(original, translated, server, output_file):
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(output_file, "a", encoding="utf-8") as f:
f.write(f"<p>{original}</p>\n")
f.write(f"<p>{translated}</p>\n")

def split_into_paragraphs(text, max_length=1000):
paragraphs = []
current_paragraph = ""

for line in text.split('\n'):
if len(current_paragraph) + len(line) > max_length and current_paragraph:
paragraphs.append(current_paragraph.strip())
current_paragraph = ""
current_paragraph += line + '\n'

if current_paragraph:
paragraphs.append(current_paragraph.strip())

return paragraphs

def main(source_lang, input_file, output_file):
print(f"Translating from {source_lang} to Chinese.")
print(f"Input file: {input_file}")
print(f"Output file: {output_file}")
print(f"Using local service only: {'Yes' if USE_LOCAL_ONLY else 'No'}")
print()

with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()

paragraphs = split_into_paragraphs(content)
total_paragraphs = len(paragraphs)

for i, paragraph in enumerate(paragraphs, 1):
print(f"Translating paragraph {i}/{total_paragraphs}")

result, server = translate_text(paragraph, source_lang)
if result and "data" in result:
translated = result["data"]
save_translation(paragraph, translated, server, output_file)
print(f"Paragraph {i} translated successfully using {server}")
else:
print(f"Translation failed for paragraph {i}. Skipping...")

print("Translation completed. Results saved to", output_file)

if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Book Translator")
parser.add_argument("--lang", default="AUTO", help="Source language (default: AUTO)")
parser.add_argument("--i", default="txt/book.txt", help="Input file path")
parser.add_argument("--o", default="txt/translated_book.txt", help="Output file path")
args = parser.parse_args()
main(args.lang, args.i, args.o)

然后使用 pandoc 转换翻译结果为 book.html:

1
pandoc translated_book.txt -o web/book.html --css=tokyo.css -t html --template=template.html --metadata title="Book Title"

html 的默认样式,tokyo.css 类似如下,灵感来自 nvim 的 tokyonight 主题:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
/* TokyoNight Theme for HTML with Reading Enhancements */

body {
background-color: #282c34;
color: #abb2bf;
font-family: "Fira Code", monospace;
line-height: 1.6; /* Increased line height for better readability */
text-align: justify; /* Justify text for even spacing */
padding: 12px 24px;
}

a {
color: #81a1c1;
}

a:hover {
color: #88c0d0;
}

h1,
h2,
h3,
h4,
h5,
h6 {
color: #e06c75;
}

code,
pre {
background-color: #373b41;
color: #abb2bf;
padding: 0.2em; /* Add padding for better code readability */
}

.comment {
color: #5c6370;
}

.string {
color: #98c379;
}

.keyword {
color: #c678dd;
}

.number {
color: #d19a66;
}

.function {
color: #61aeee;
}

.boolean {
color: #c678dd;
}

.operator {
color: #abb2bf;
}

.punctuation {
color: #abb2bf;
}

/* Highlight lines */
.highlight {
background-color: #44475a;
}

template.html 模版类似:

1
2
3
4
5
6
7
8
9
10
11
12
13
<!doctype html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<meta name="robots" content="noindex,nofollow" />
<title>$title$</title>
<link rel="stylesheet" href="$css$" />
</head>
<body>
$body$
</body>
</html>

最后还可以使用 pandoc 转换 html 为 epub 在手机上阅读:

1
pandoc web/book.html -o epub/book.epub --metadata-file=metadata.yml --css=book.css

metadata.yml:

1
2
3
4
5
6
---
title: "Book Title"
author: "The Author"
cover-image: "book_cover.png"
css: "book.css"
---

book.css:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
body {
font-family: "Georgia, serif"; /* 默认字体 */
line-height: 1.6; /* 行高 */
/* background-color: #d5d6db; */
/* color: #2e3338; */
}

p {
/* margin-bottom: 1em; /* 为段落添加一些间距 */ */
/* color: #2e3338; */
}

h1,
h2,
h3,
h4,
h5,
h6 {
font-family: "Arial, sans-serif"; /* 标题字体 */
color: #2e3338;
}