-
Notifications
You must be signed in to change notification settings - Fork 0
/
translate-json.js
171 lines (147 loc) · 7.39 KB
/
translate-json.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
/* eslint-disable no-console */
const fs = require("fs");
const { Translate } = require("@google-cloud/translate").v2;
const costPerMillionCharsUsd = 20; // Update this value if the price has changed
const batchSize = 100;
const translateInBatches = async (textToTranslate, sourceLang, targetLang, translate) => {
const translations = [];
for (let i = 0; i < textToTranslate.length; i += batchSize) {
const batch = textToTranslate.slice(i, i + batchSize);
console.log(`📤 Sending batch to the API ${sourceLang} -> ${targetLang} ...`);
const [batchTranslations] = await translate.translate(batch, {
from: sourceLang,
to: targetLang,
});
translations.push(...batchTranslations);
}
return translations;
};
const translateRecursive = async (obj, sourceLang, targetLang, translate) => {
const textToTranslate = [];
const placeholders = [];
const collectText = (obj, path = []) => {
if (typeof obj === "string") {
textToTranslate.push(obj);
placeholders.push(path);
} else if (Array.isArray(obj)) {
obj.forEach((item, index) => collectText(item, path.concat(index)));
} else if (typeof obj === "object" && obj !== null) {
Object.entries(obj).forEach(([key, value]) => collectText(value, path.concat(key)));
}
};
collectText(obj);
let translations = await translateInBatches(textToTranslate, sourceLang, targetLang, translate);
// Remove diacritics for specific languages
if (["he", "iw", "ar", "fa", "ur"].includes(targetLang)) {
translations = translations.map(removeDiacritics);
}
const applyTranslations = (obj, path = []) => {
if (typeof obj === "string") {
const index = placeholders.findIndex((p) => JSON.stringify(p) === JSON.stringify(path));
return translations[index];
}
if (Array.isArray(obj)) {
return obj.map((item, index) => applyTranslations(item, path.concat(index)));
}
if (typeof obj === "object" && obj !== null) {
const translatedObj = {};
Object.entries(obj).forEach(([key, value]) => {
translatedObj[key] = applyTranslations(value, path.concat(key));
});
return translatedObj;
}
return obj;
};
return applyTranslations(obj);
};
const translateObject = async (obj, sourceLang, targetLangs, translate) => {
const translations = {};
for (const lang of targetLangs) {
console.log(`🌐 Translating to ${lang} from ${sourceLang} ...`);
translations[lang] = await translateRecursive(obj, sourceLang, lang, translate);
console.log(`📥 Received translation for ${lang} from ${sourceLang} ...`);
}
return translations;
};
const countCharacters = (obj) => {
let count = 0;
const traverse = (item) => {
if (typeof item === "string") {
count += item.length;
} else if (Array.isArray(item)) {
item.forEach(traverse);
} else if (typeof item === "object" && item !== null) {
Object.values(item).forEach(traverse);
}
};
traverse(obj);
return count;
};
const translateJson = async (inputLang, targetLangs, apiKey, isCLI = false) => {
try {
const translate = new Translate({ key: apiKey });
const inputPath = `${inputLang}.json`;
const data = JSON.parse(fs.readFileSync(inputPath, "utf-8"));
const targetLangsArray = targetLangs.split(",");
const langsAmounts = targetLangsArray.length;
const textLength = countCharacters(data);
const totalCharacters = textLength * targetLangsArray.length;
const estimatedCost = ((totalCharacters / 1_000_000) * costPerMillionCharsUsd).toFixed(10);
console.log("\n🔄 Starting translation process...");
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
console.log(`📊 Translation Statistics:`);
console.log(` • Total characters to be translated: \x1b[1m${textLength.toLocaleString()}\x1b[0m`);
console.log(` • Number of target languages: \x1b[1m${targetLangsArray.length}\x1b[0m (${targetLangsArray.join(", ")})`);
console.log(` • Total characters (including all target languages): \x1b[1m${totalCharacters.toLocaleString()}\x1b[0m`);
console.log(` • Cost per million characters: \x1b[1m$${costPerMillionCharsUsd.toFixed(2)}\x1b[0m`);
console.log(` • Estimated cost: \x1b[1m$${estimatedCost}\x1b[0m`);
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
if (isCLI) {
console.log("\n┌──────────────────────────────────────────────────┐");
console.log("│ Press Enter to continue, Esc or Ctrl+C to cancel │");
console.log("└──────────────────────────────────────────────────┘");
await new Promise((resolve) => {
process.stdin.setRawMode(true);
process.stdin.resume();
process.stdin.once("data", (key) => {
const byteArray = [...key];
if (byteArray[0] === 3 || byteArray[0] === 27) {
// 3 for Ctrl+C, 27 for Esc
console.log("\n❌ Translation cancelled.");
process.exit(0);
} else if (byteArray[0] === 13) {
// 13 for Enter
process.stdin.setRawMode(false);
process.stdin.pause();
resolve();
}
});
});
console.log("\n✅ Continuing with translation...");
}
const translations = await translateObject(data, inputLang, targetLangsArray, translate);
for (const [lang, translatedData] of Object.entries(translations)) {
const outputPath = `${lang}.json`;
fs.writeFileSync(outputPath, JSON.stringify(translatedData, null, 2));
console.log(`✅ Written to ${outputPath}`);
}
console.log("\n✅ Translation completed successfully!");
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
} catch (error) {
console.log("\n❌ Translation failed!");
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━");
console.log(`🚫 Error: ${error}`);
console.log("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n");
throw error;
}
};
module.exports = translateJson;
function removeDiacritics(text) {
return text
.normalize("NFD")
.replace(/[\u0591-\u05C7]/g, "") // Hebrew
.replace(/[\u0610-\u061A\u064B-\u065F\u06D6-\u06DC\u06DF-\u06E8\u06EA-\u06ED]/g, "") // Arabic
.replace(/[\u0610-\u061A\u0656-\u065F\u0670\u06D6-\u06ED]/g, "") // Persian (Farsi)
.replace(/[\u0610-\u061A\u0656-\u065F\u0670\u06D6-\u06ED]/g, "") // Urdu
.normalize("NFC");
}