Skip to content

Commit

Permalink
Improved the tokenizer
Browse files Browse the repository at this point in the history
Now uses string_view, supports a max split count, and has unit tests.
  • Loading branch information
mmatyas committed Oct 7, 2024
1 parent ac20dd4 commit 19cbe6d
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 10 deletions.
8 changes: 4 additions & 4 deletions src/common/FileList.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void UpdateMusicWithOverrides(MusicList& musiclist, WorldMusicList& worldmusicli
if (currentCategory == Category::None)
continue;

std::list<std::string> tokens = tokenize(line, ',');
std::list<std::string_view> tokens = tokenize(line, ',');
if (tokens.empty())
continue;

Expand All @@ -80,8 +80,8 @@ void UpdateMusicWithOverrides(MusicList& musiclist, WorldMusicList& worldmusicli
override.mapname = std::move(tokens.front());
tokens.pop_front();

for (const std::string& token : tokens) {
std::string path = convertPath(token);
for (const std::string_view token : tokens) {
std::string path = convertPath(std::string(token));
if (FileExists(path))
override.songs.emplace_back(std::move(path));
}
Expand All @@ -95,7 +95,7 @@ void UpdateMusicWithOverrides(MusicList& musiclist, WorldMusicList& worldmusicli
override.worldname = std::move(tokens.front());
tokens.pop_front();

std::string path = convertPath(tokens.front());
std::string path = convertPath(std::string(tokens.front()));
if (FileExists(path)) {
override.song = std::move(path);
worldmusicoverrides.emplace_back(std::move(override));
Expand Down
28 changes: 23 additions & 5 deletions src/common/linfunc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,31 @@ bool cstr_ci_equals(const char* const a, const char* const b)
return true;
}

std::list<std::string> tokenize(const std::string& text, char delim)
std::list<std::string_view> tokenize(std::string_view text, char delim, size_t maxsplit)
{
std::list<std::string> tokens;
if (text.empty()) {
return { text };
}

std::list<std::string_view> tokens;
size_t start = 0;

while (start < text.size()) {
if (maxsplit == 0) {
tokens.emplace_back(text.substr(start));
break;
}

std::istringstream stream(text);
for (std::string part; std::getline(stream, part, delim);)
tokens.emplace_back(part);
const size_t end = text.find(delim, start);
if (end == std::string_view::npos) {
tokens.emplace_back(text.substr(start));
break;
}

tokens.emplace_back(text.substr(start, end - start));
start = end + 1;
maxsplit--;
}

return tokens;
}
3 changes: 2 additions & 1 deletion src/common/linfunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@

#include <list>
#include <string>
#include <string_view>


char *inPlaceLowerCase(char *str);
void inPlaceLowerCase(std::string& str);
bool cstr_ci_equals(const char* const a, const char* const b);

std::list<std::string> tokenize(const std::string& text, char delim);
std::list<std::string_view> tokenize(std::string_view text, char delim, size_t maxsplit = -1);


//_DEBUG
Expand Down
1 change: 1 addition & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ endfunction()

smw_create_test(test_vec2 common/math/test_vec2.cpp)
smw_create_test(test_version common/test_version.cpp)
smw_create_test(test_tokenize common/test_tokenize.cpp)
51 changes: 51 additions & 0 deletions tests/common/test_tokenize.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#define DOCTEST_CONFIG_IMPLEMENT_WITH_MAIN
#include "doctest.h"

#include "linfunc.h"


TEST_CASE("Empty string") {
const std::string text = "";
const std::list<std::string_view> result = tokenize(text, ',');
CHECK(result.size() == 1);
CHECK(result.front() == text);
}

TEST_CASE("No delimiter") {
const std::string text = "textwithoutdelimiter";
const std::list<std::string_view> result = tokenize(text, ',');
CHECK(result.size() == 1);
CHECK(result.front() == text);
}

TEST_CASE("One or more delimiters") {
const std::string text = "apple,banana,cherry";

SUBCASE("Zero") {
const std::list<std::string_view> result = tokenize(text, ',', 0);
const std::list<std::string_view> expected {"apple,banana,cherry"};
CHECK(result.size() == expected.size());
CHECK(result == expected);
}

SUBCASE("One split") {
const std::list<std::string_view> result = tokenize(text, ',', 1);
const std::list<std::string_view> expected {"apple", "banana,cherry"};
CHECK(result.size() == expected.size());
CHECK(result == expected);
}

SUBCASE("Two splits") {
const std::list<std::string_view> result = tokenize(text, ',', 2);
const std::list<std::string_view> expected {"apple", "banana", "cherry"};
CHECK(result.size() == expected.size());
CHECK(result == expected);
}

SUBCASE("Unlimited") {
const std::list<std::string_view> result = tokenize(text, ',');
const std::list<std::string_view> expected {"apple", "banana", "cherry"};
CHECK(result.size() == expected.size());
CHECK(result == expected);
}
}

0 comments on commit 19cbe6d

Please sign in to comment.