Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid reallocations in base64 encoding #6951

Merged
merged 1 commit into from
Jun 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
- NodeJS:
- CHANGED: Use node-api instead of NAN. [#6452](https://github.com/Project-OSRM/osrm-backend/pull/6452)
- Misc:
- CHANGED: Avoid reallocations in base64 encoding. [#6951](https://github.com/Project-OSRM/osrm-backend/pull/6951)
- CHANGED: Get rid of unused Boost dependencies. [#6960](https://github.com/Project-OSRM/osrm-backend/pull/6960)
- CHANGED: Apply micro-optimisation for Table & Trip APIs. [#6949](https://github.com/Project-OSRM/osrm-backend/pull/6949)
- CHANGED: Apply micro-optimisation for Route API. [#6948](https://github.com/Project-OSRM/osrm-backend/pull/6948)
Expand Down
27 changes: 16 additions & 11 deletions include/engine/base64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,29 @@ namespace engine
// Encodes a chunk of memory to Base64.
inline std::string encodeBase64(const unsigned char *first, std::size_t size)
{
std::vector<unsigned char> bytes{first, first + size};
BOOST_ASSERT(!bytes.empty());
BOOST_ASSERT(size > 0);

std::size_t bytes_to_pad{0};
std::string encoded;
encoded.reserve(((size + 2) / 3) * 4);

while (bytes.size() % 3 != 0)
auto padding = (3 - size % 3) % 3;

BOOST_ASSERT(padding == 0 || padding == 1 || padding == 2);

for (auto itr = detail::Base64FromBinary(first); itr != detail::Base64FromBinary(first + size);
++itr)
{
bytes_to_pad += 1;
bytes.push_back(0);
encoded.push_back(*itr);
}

BOOST_ASSERT(bytes_to_pad == 0 || bytes_to_pad == 1 || bytes_to_pad == 2);
BOOST_ASSERT_MSG(0 == bytes.size() % 3, "base64 input data size is not a multiple of 3");
for (size_t index = 0; index < padding; ++index)
{
encoded.push_back('=');
}

std::string encoded{detail::Base64FromBinary{bytes.data()},
detail::Base64FromBinary{bytes.data() + (bytes.size() - bytes_to_pad)}};
BOOST_ASSERT(encoded.size() == (size + 2) / 3 * 4);

return encoded.append(bytes_to_pad, '=');
return encoded;
}

// C++11 standard 3.9.1/1: Plain char, signed char, and unsigned char are three distinct types
Expand Down
94 changes: 94 additions & 0 deletions unit_tests/engine/base64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,98 @@ BOOST_AUTO_TEST_CASE(hint_encoding_decoding_roundtrip_bytewise)
reinterpret_cast<const unsigned char *>(&decoded)));
}

BOOST_AUTO_TEST_CASE(long_string_encoding)
{
using namespace osrm::engine;
std::string long_string(1000, 'A'); // String of 1000 'A's
std::string encoded = encodeBase64(long_string);
BOOST_CHECK_EQUAL(decodeBase64(encoded), long_string);
}

BOOST_AUTO_TEST_CASE(invalid_base64_decoding)
{
using namespace osrm::engine;
BOOST_CHECK_THROW(decodeBase64("Invalid!"), std::exception);
}

BOOST_AUTO_TEST_CASE(hint_serialization_size)
{
using namespace osrm::engine;
using namespace osrm::util;

const Coordinate coordinate;
const PhantomNode phantom;
const osrm::test::MockDataFacade<osrm::engine::routing_algorithms::ch::Algorithm> facade{};

const SegmentHint hint{phantom, facade.GetCheckSum()};
const auto base64 = hint.ToBase64();

BOOST_CHECK_EQUAL(base64.size(), 112);
}

BOOST_AUTO_TEST_CASE(extended_roundtrip_tests)
{
using namespace osrm::engine;

std::vector<std::string> test_strings = {
"Hello, World!", // Simple ASCII string
"1234567890", // Numeric string
"!@#$%^&*()_+", // Special characters
std::string(1000, 'A'), // Long repeating string
"¡Hola, mundo!", // Non-ASCII characters
"こんにちは、世界!", // Unicode characters
std::string("\x00\x01\x02\x03", 4), // Binary data
"a", // Single character
"ab", // Two characters
"abc", // Three characters (no padding in Base64)
std::string(190, 'x') // String that doesn't align with Base64 padding
};

for (const auto &test_str : test_strings)
{
std::string encoded = encodeBase64(test_str);
std::string decoded = decodeBase64(encoded);
BOOST_CHECK_EQUAL(decoded, test_str);

// Additional checks
BOOST_CHECK(encoded.find_first_not_of(
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=") ==
std::string::npos);
if (test_str.length() % 3 != 0)
{
BOOST_CHECK(encoded.back() == '=');
}
}
}

BOOST_AUTO_TEST_CASE(roundtrip_with_url_safe_chars)
{
using namespace osrm::engine;

std::string original = "Hello+World/Nothing?Is:Impossible";
std::string encoded = encodeBase64(original);

// Replace '+' with '-' and '/' with '_'
std::replace(encoded.begin(), encoded.end(), '+', '-');
std::replace(encoded.begin(), encoded.end(), '/', '_');

std::string decoded = decodeBase64(encoded);
BOOST_CHECK_EQUAL(decoded, original);
}

BOOST_AUTO_TEST_CASE(roundtrip_stress_test)
{
using namespace osrm::engine;

std::string test_str;
for (int i = 0; i < 1000; ++i)
{
test_str += static_cast<char>(i % 256);
}

std::string encoded = encodeBase64(test_str);
std::string decoded = decodeBase64(encoded);
BOOST_CHECK_EQUAL(decoded, test_str);
}

BOOST_AUTO_TEST_SUITE_END()