Skip to content

Commit

Permalink
Proper fix utf8 command line arguments (#14253)
Browse files Browse the repository at this point in the history
#14197
Tried to fix utf-8 issue, but it didnt handle multibyte chars.
Only way I found that works constantly is using `CommandLineToArgvW`.
To not ripple out `wchar_t`, I convert to and from where needed

Closes #14253

COPYBARA_INTEGRATE_REVIEW=#14253 from hknielsen:proper-fix-none-ascii-issue cad753e
FUTURE_COPYBARA_INTEGRATE_REVIEW=#14253 from hknielsen:proper-fix-none-ascii-issue cad753e
PiperOrigin-RevId: 599826579
  • Loading branch information
hknielsen authored and copybara-github committed Jan 20, 2024
1 parent f0ccf26 commit e67b009
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 3 deletions.
1 change: 1 addition & 0 deletions build_defs/cpp_opts.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ LINK_OPTS = select({
"//build_defs:config_msvc": [
# Suppress linker warnings about files with no symbols defined.
"-ignore:4221",
"Shell32.lib",
],
"@platforms//os:macos": [
"-lpthread",
Expand Down
17 changes: 15 additions & 2 deletions src/google/protobuf/compiler/command_line_interface.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1766,10 +1766,23 @@ bool CommandLineInterface::MakeInputsBeProtoPathRelative(


bool CommandLineInterface::ExpandArgumentFile(
const std::string& file, std::vector<std::string>* arguments) {
const char* file, std::vector<std::string>* arguments) {
// On windows to force ifstream to handle proper utr-8, we need to convert to
// proper supported utf8 wstring. If we dont then the file can't be opened.
#ifdef _MSC_VER
// Convert the file name to wide chars.
int size = MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), NULL, 0);
std::wstring file_str;
file_str.resize(size);
MultiByteToWideChar(CP_UTF8, 0, file, strlen(file), &file_str[0],
file_str.size());
#else
std::string file_str(file);
#endif

// The argument file is searched in the working directory only. We don't
// use the proto import path here.
std::ifstream file_stream(file.c_str());
std::ifstream file_stream(file_str.c_str());
if (!file_stream.is_open()) {
return false;
}
Expand Down
2 changes: 1 addition & 1 deletion src/google/protobuf/compiler/command_line_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class PROTOC_EXPORT CommandLineInterface {

// Read an argument file and append the file's content to the list of
// arguments. Return false if the file cannot be read.
bool ExpandArgumentFile(const std::string& file,
bool ExpandArgumentFile(const char* file,
std::vector<std::string>* arguments);

// Parses a command-line argument into a name/value pair. Returns
Expand Down
26 changes: 26 additions & 0 deletions src/google/protobuf/compiler/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
// Must be included last.
#include "google/protobuf/port_def.inc"

#ifdef _MSC_VER
#include <windows.h>
#endif

namespace google {
namespace protobuf {
namespace compiler {
Expand Down Expand Up @@ -101,6 +105,28 @@ int ProtobufMain(int argc, char* argv[]) {
} // namespace protobuf
} // namespace google

#ifdef _MSC_VER
std::string ToMultiByteUtf8String(const wchar_t* input) {
int size =
WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), 0, 0, NULL, NULL);
std::string result(size, 0);
if (size)
WideCharToMultiByte(CP_UTF8, 0, input, wcslen(input), &result[0], size,
NULL, NULL);
return result;
}

int main(int argc, char* argv[]) {
wchar_t** wargv = CommandLineToArgvW(GetCommandLineW(), &argc);
char** argv_mbcs = new char*[argc];
for (int i = 0; i < argc; i++) {
std::string* multibyte_string = new auto(ToMultiByteUtf8String(wargv[i]));
argv_mbcs[i] = const_cast<char*>(multibyte_string->c_str());
}
return google::protobuf::compiler::ProtobufMain(argc, argv);
}
#else
int main(int argc, char* argv[]) {
return google::protobuf::compiler::ProtobufMain(argc, argv);
}
#endif

0 comments on commit e67b009

Please sign in to comment.