Skip to content

Commit

Permalink
Add experimental po file pseudo translation
Browse files Browse the repository at this point in the history
  • Loading branch information
Blake-Madden committed Sep 18, 2024
1 parent 10c59e4 commit 596af38
Show file tree
Hide file tree
Showing 5 changed files with 289 additions and 4 deletions.
2 changes: 1 addition & 1 deletion gui/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")
set(FILES ../src/analyze.cpp ../src/gui/i18napp.cpp ../src/gui/projectdlg.cpp ../src/i18n_string_util.cpp
../src/i18n_review.cpp ../src/input.cpp ../src/gui/datamodel.cpp ../src/gui/app_options.cpp
../src/cpp_i18n_review.cpp ../src/po_file_review.cpp ../src/translation_catalog_review.cpp
../src/rc_file_review.cpp)
../src/pseudo_translate.cpp ../src/rc_file_review.cpp)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_SOURCE_DIR}/bin)

if(WIN32)
Expand Down
78 changes: 78 additions & 0 deletions src/analyze.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,84 @@ namespace i18n_check
return std::make_pair(true, buffer);
}

//------------------------------------------------------
void pseudo_translate(const std::vector<std::wstring>& filesToTranslate,
analyze_callback callback)
{
size_t currentFileIndex{ 0 };

const auto outputFile = [](const std::filesystem::path filePath, const std::wstring content)
{
std::u32string outBuffer;
outBuffer.reserve(content.size());
for (const auto& chr : content)
{
outBuffer += static_cast<char32_t>(chr);
}
std::string utfBuffer{ utf8::utf32to8(outBuffer) };

std::ofstream out(filePath);
if (out.is_open())
{
out.write(utfBuffer.c_str(), utfBuffer.size());
}
};

pseudo_translater trans;

// load file content into analyzers
for (const auto& file : filesToTranslate)
{
if (!callback(++currentFileIndex, filesToTranslate.size(), file))
{
return;
}

const file_review_type fileType = get_file_type(file);

std::filesystem::path outPath = std::filesystem::path{ file };
outPath.replace_filename(L"pseudo_" + outPath.filename().generic_wstring());

try
{
bool startsWithBom{ false };
if (auto [readUtf8Ok, fileUtf8Text] = read_utf8_file(file, startsWithBom);
readUtf8Ok)
{
if (fileType == file_review_type::po)
{
trans.po_file(fileUtf8Text);
outputFile(outPath, fileUtf8Text);
}
}
else if (auto [readUtf16Ok, fileUtf16Text] = read_utf16_file(file); readUtf16Ok)
{
// UTF-16 may not be supported consistently on all platforms and compilers
if (fileType == file_review_type::po)
{
trans.po_file(fileUtf16Text);
outputFile(outPath, fileUtf16Text);
}
}
else
{
std::wifstream ifs(std::filesystem::path(file).string());
std::wstring str((std::istreambuf_iterator<wchar_t>(ifs)),
std::istreambuf_iterator<wchar_t>());
if (fileType == file_review_type::po)
{
trans.po_file(str);
outputFile(outPath, str);
}
}
}
catch (const std::exception& expt)
{
std::wcout << i18n_string_util::lazy_string_to_wstring(expt.what()) << L"\n";
}
}
}

//------------------------------------------------------
void analyze(const std::vector<std::wstring>& filesToAnalyze, i18n_check::cpp_i18n_review& cpp,
i18n_check::rc_file_review& rc, i18n_check::po_file_review& po,
Expand Down
19 changes: 16 additions & 3 deletions src/analyze.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "cpp_i18n_review.h"
#include "i18n_string_util.h"
#include "po_file_review.h"
#include "pseudo_translate.h"
#include "rc_file_review.h"
#include "unicode_extract_text.h"
#include "utfcpp/source/utf8.h"
Expand All @@ -34,7 +35,8 @@ namespace i18n_check
/// @private
bool valid_utf8_file(const std::filesystem::path& filePath, bool& startsWithBom);
/// @private
std::pair<bool, std::wstring> read_utf8_file(const std::filesystem::path& filePath, bool& startsWithBom);
std::pair<bool, std::wstring> read_utf8_file(const std::filesystem::path& filePath,
bool& startsWithBom);

/** @brief Runs all analyzers on a set of files.
@param filesToAnalyze The files to analyze.
Expand All @@ -46,11 +48,22 @@ namespace i18n_check
Windows UTF-8 file signature.
@param callback Callback function to display the progress.
Takes the current file index, overall file count, and the name of the current file.
Returning @c indicates that the user cancelled the analysis.*/
Returning @c false indicates that the user cancelled the analysis.*/
void analyze(const std::vector<std::wstring>& filesToAnalyze, i18n_check::cpp_i18n_review& cpp,
i18n_check::rc_file_review& rc, i18n_check::po_file_review& po,
std::vector<std::wstring>& filesThatShouldBeConvertedToUTF8,
std::vector<std::wstring>& filesThatContainUTF8Signature, analyze_callback callback);
std::vector<std::wstring>& filesThatContainUTF8Signature,
analyze_callback callback);

/** @brief Pseudo translates a set of files.
@details Copies of each file are made in the same folder with
'pseudo_' prepended to the file name.
@param filesToTranslate The files to translate.
@param callback Callback function to display the progress.
Takes the current file index, overall file count, and the name of the current file.
Returning @c false indicates that the user cancelled the analysis.*/
void pseudo_translate(const std::vector<std::wstring>& filesToTranslate,
analyze_callback callback);

/** @returns A formatted summary of the results.
@param[in,out] cpp The C++ analyzer that was used.
Expand Down
145 changes: 145 additions & 0 deletions src/pseudo_translate.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
///////////////////////////////////////////////////////////////////////////////
// Name: pseudo_translate.cpp
// Author: Blake Madden
// Copyright: (c) 2021-2024 Blake Madden
// License: 3-Clause BSD license
// SPDX-License-Identifier: BSD-3-Clause
///////////////////////////////////////////////////////////////////////////////

#include "pseudo_translate.h"

namespace i18n_check
{
//------------------------------------------------
void pseudo_translater::po_file(std::wstring& poFileText) const
{
if (poFileText.empty())
{
return;
}

static const std::wregex msgstr_regex{ LR"((\r|\n)+msgstr(\[[0-9]+\])? ")" };
static const std::wstring_view MSGID{ L"msgid \"" };
static const std::wstring_view MSGID_PLURAL{ L"msgid_plural \"" };
static const std::wstring_view MSGSTR{ L"msgstr \"" };
static const std::wstring_view MSGSTR0{ L"msgstr[0] \"" };
static const std::wstring_view MSGSTR1{ L"msgstr[1] \"" };

std::wsmatch res;
size_t currentPosition{ 0 };

std::wstring_view fileContent{ poFileText };

auto [foundEntry, entryContent, entryPos] = i18n_review::read_po_catalog_entry(fileContent);
while (foundEntry)
{
// step to start of catalog entry
currentPosition += entryPos;
fileContent = std::wstring_view{ poFileText }.substr(currentPosition);

std::wstring_view msgIdEntry{ entryContent };
const auto [foundMsgId, msgIdContent, msgIdPos, msgIdLen] =
i18n_review::read_po_msg(msgIdEntry, MSGID);
int64_t altertedLenthDiff{ 0 };
if (foundMsgId)
{
std::wstring_view msgStrEntry{ entryContent };
const auto [foundMsgStr, msgStrContent, msgStrPos, msgStrLen] =
i18n_review::read_po_msg(msgStrEntry, MSGSTR);
if (foundMsgStr)
{
altertedLenthDiff = msgIdLen - msgStrLen;
if (m_transType == pseudo_translation_method::all_caps)
{
poFileText.replace(currentPosition + msgStrPos + MSGSTR.length(), msgStrLen,
mutate_message_caps(msgIdContent));
}
}
}

// step to end of catalog entry and look for next one
currentPosition += entryContent.length();
fileContent =
std::wstring_view{ poFileText }.substr(currentPosition + altertedLenthDiff);
std::tie(foundEntry, entryContent, entryPos) =
i18n_review::read_po_catalog_entry(fileContent);
}
}

//------------------------------------------------
std::wstring pseudo_translater::mutate_message_caps(const std::wstring& msg)
{
const auto printfSpecifiers = i18n_review::load_cpp_printf_command_positions(msg);

std::wstring newMsg;
newMsg.reserve(msg.size());
for (size_t i = 0; i < msg.length(); /* handled in loop*/)
{
// step over printf commands
const auto foundPos = std::find_if(printfSpecifiers.cbegin(), printfSpecifiers.cend(),
[i](auto val) noexcept { return val.first == i; });
if (foundPos != printfSpecifiers.cend())
{
newMsg += msg.substr(i, foundPos->second);
i += foundPos->second;
continue;
}

if (std::iswalpha(msg[i]))
{
newMsg += std::towupper(msg[i]);
}
else
{
newMsg += msg[i];
}
++i;
}

return newMsg;
}

//------------------------------------------------
size_t pseudo_translater::find_po_msg_end(const std::wstring& poFileText, const size_t startPos)
{
size_t idEndPos{ startPos };
while (true)
{
idEndPos = poFileText.find(L'\"', idEndPos);
if (idEndPos == std::wstring_view::npos)
{
return std::wstring_view::npos;
}
// skip escaped quotes
if (idEndPos > 0 && poFileText[idEndPos - 1] == L'\\')
{
++idEndPos;
continue;
}
else
{
size_t lookAheadIndex{ idEndPos + 1 };
// jump to next line
while (lookAheadIndex < poFileText.length() &&
string_util::is_either(poFileText[lookAheadIndex], L'\r', L'\n'))
{
++lookAheadIndex;
}
// eat up leading spaces
while (lookAheadIndex < poFileText.length() &&
string_util::is_either(poFileText[lookAheadIndex], L'\t', L' '))
{
++lookAheadIndex;
}
// if a quote, then this is still be part of the same string
if (lookAheadIndex < poFileText.length() && poFileText[lookAheadIndex] == L'"')
{
idEndPos = lookAheadIndex + 1;
continue;
}
break;
}
}
return idEndPos;
}
} // namespace i18n_check
49 changes: 49 additions & 0 deletions src/pseudo_translate.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/** @addtogroup Internationalization
@brief i18n classes.
@date 2021-2024
@copyright Blake Madden
@author Blake Madden
@details This program is free software; you can redistribute it and/or modify
it under the terms of the 3-Clause BSD License.
SPDX-License-Identifier: BSD-3-Clause
@{*/

#ifndef __PSEUDO_FILE_REVIEW_H__
#define __PSEUDO_FILE_REVIEW_H__

#include "i18n_review.h"
#include <map>
#include <regex>
#include <set>
#include <string>
#include <utility>
#include <vector>

namespace i18n_check
{
/// @brief The different ways a message can be pseudo translated.
enum class pseudo_translation_method
{
/// @brief Uppercase all letters.
all_caps
};

/** @brief Class to pseudo translate a file.*/
class pseudo_translater
{
public:
/** @brief Pseudo translates a gettext PO file.
@param[in,out] poFileText The PO file's text to translate.*/
void po_file(std::wstring& poFileText) const;

private:
static size_t find_po_msg_end(const std::wstring& poFileText, const size_t startPos);
static std::wstring mutate_message_caps(const std::wstring& msg);
pseudo_translation_method m_transType{ pseudo_translation_method::all_caps };
};
} // namespace i18n_check

/** @}*/

#endif // __PSEUDO_FILE_REVIEW_H__

0 comments on commit 596af38

Please sign in to comment.