diff --git a/src/rime/algo/encoder.cc b/src/rime/algo/encoder.cc index 478b052610..15064a9271 100644 --- a/src/rime/algo/encoder.cc +++ b/src/rime/algo/encoder.cc @@ -8,6 +8,7 @@ #include #include #include +#include namespace rime { @@ -15,14 +16,12 @@ static const int kEncoderDfsLimit = 32; static const int kMaxPhraseLength = 32; string RawCode::ToString() const { - return boost::join(*this, " "); + return strings::join(*this, " "); } void RawCode::FromString(const string &code_str) { - boost::split(*dynamic_cast *>(this), - code_str, - boost::algorithm::is_space(), - boost::algorithm::token_compress_on); + *dynamic_cast *>(this) = + strings::split(code_str, " "); } TableEncoder::TableEncoder(PhraseCollector* collector) diff --git a/src/rime/algo/strings.cc b/src/rime/algo/strings.cc new file mode 100644 index 0000000000..801e9e43c3 --- /dev/null +++ b/src/rime/algo/strings.cc @@ -0,0 +1,37 @@ +#include + +namespace rime { +namespace strings { + +vector split(const string& str, const string& delim, SplitBehavior behavior) { + vector strings; + size_t lastPos, pos; + if (behavior == SplitBehavior::SkipEmpty) { + lastPos = str.find_first_not_of(delim, 0); + } else { + lastPos = 0; + } + pos = str.find_first_of(delim, lastPos); + + while (std::string::npos != pos || std::string::npos != lastPos) { + strings.emplace_back(str.substr(lastPos, pos - lastPos)); + if (behavior == SplitBehavior::SkipEmpty) { + lastPos = str.find_first_not_of(delim, pos); + } else { + if (pos == std::string::npos) { + break; + } + lastPos = pos + 1; + } + pos = str.find_first_of(delim, lastPos); + } + return strings; +}; + +vector split(const string& str, const string& delim) { + return split(str, delim, SplitBehavior::SkipEmpty); +}; + +} // namespace strings +} // namespace rime + diff --git a/src/rime/algo/strings.h b/src/rime/algo/strings.h new file mode 100644 index 0000000000..ba4f262f73 --- /dev/null +++ b/src/rime/algo/strings.h @@ -0,0 +1,43 @@ +#ifndef RIME_STRINGS_H_ +#define RIME_STRINGS_H_ + +#include +#include + +namespace rime { +namespace strings { + +enum class SplitBehavior { KeepEmpty, SkipEmpty }; + +vector split(const string& str, const string& delim, SplitBehavior behavior); + +vector split(const string& str, const string& delim); + +template +string join(Iter start, Iter end, T &&delim) { + string result; + if (start != end) { + result += (*start); + start++; + } + for (; start != end; start++) { + result += (delim); + result += (*start); + } + return result; +} + +template +inline string join(C &&container, T &&delim) { + return join(std::begin(container), std::end(container), delim); +} + +template +inline string join(std::initializer_list &&container, T &&delim) { + return join(std::begin(container), std::end(container), delim); +} + +} // namespace strings +} // namespace rime + +#endif // RIME_STRINGS_H_ diff --git a/src/rime/algo/utilities.cc b/src/rime/algo/utilities.cc index 3ccb1fcedb..d962837120 100644 --- a/src/rime/algo/utilities.cc +++ b/src/rime/algo/utilities.cc @@ -5,6 +5,7 @@ // 2013-01-30 GONG Chen // #include +#include #include #include @@ -35,8 +36,9 @@ ChecksumComputer::ChecksumComputer(uint32_t initial_remainder) void ChecksumComputer::ProcessFile(const string& file_name) { std::ifstream fin(file_name.c_str()); - string file_content((std::istreambuf_iterator(fin)), - std::istreambuf_iterator()); + std::stringstream buffer; + buffer << fin.rdbuf(); + const auto& file_content(buffer.str()); crc_.process_bytes(file_content.data(), file_content.length()); } diff --git a/src/rime/dict/entry_collector.cc b/src/rime/dict/entry_collector.cc index f3232c470d..455a4e0e5b 100644 --- a/src/rime/dict/entry_collector.cc +++ b/src/rime/dict/entry_collector.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -86,9 +87,7 @@ void EntryCollector::Collect(const string& dict_file) { continue; } // read a dict entry - vector row; - boost::algorithm::split(row, line, - boost::algorithm::is_any_of("\t")); + auto row = strings::split(line, "\t"); int num_columns = static_cast(row.size()); if (num_columns <= text_column || row[text_column].empty()) { LOG(WARNING) << "Missing entry text at #" << num_entries << "."; @@ -165,7 +164,7 @@ void EntryCollector::CreateEntry(const string &word, if (scaled) { double percentage = 100.0; try { - percentage = boost::lexical_cast( + percentage = std::stod( weight_str.substr(0, weight_str.length() - 1)); } catch (...) { @@ -176,7 +175,7 @@ void EntryCollector::CreateEntry(const string &word, } else if (!weight_str.empty()) { // absolute weight try { - e.weight = boost::lexical_cast(weight_str); + e.weight = std::stod(weight_str); } catch (...) { LOG(WARNING) << "invalid entry definition at #" << num_entries << "."; @@ -212,16 +211,16 @@ void EntryCollector::CreateEntry(const string &word, bool EntryCollector::TranslateWord(const string& word, vector* result) { - ReverseLookupTable::const_iterator s = stems.find(word); + const auto& s = stems.find(word); if (s != stems.end()) { for (const string& stem : s->second) { result->push_back(stem); } return true; } - WordMap::const_iterator w = words.find(word); + const auto& w = words.find(word); if (w != words.end()) { - for (const auto& v : w->second) { + for (const auto& v : w->second) { const double kMinimalWeight = 0.05; // 5% double min_weight = total_weight[word] * kMinimalWeight; if (v.second < min_weight) diff --git a/src/rime/dict/entry_collector.h b/src/rime/dict/entry_collector.h index 77bc7ae2ee..d7842c6b53 100644 --- a/src/rime/dict/entry_collector.h +++ b/src/rime/dict/entry_collector.h @@ -24,7 +24,7 @@ struct RawDictEntry { // code -> weight using WeightMap = map; // word -> { code -> weight } -using WordMap = map; +using WordMap = hash_map; // [ (word, weight), ... ] using EncodeQueue = std::queue>; diff --git a/src/rime/dict/vocabulary.h b/src/rime/dict/vocabulary.h index 563622f09d..28afabd5bf 100644 --- a/src/rime/dict/vocabulary.h +++ b/src/rime/dict/vocabulary.h @@ -91,7 +91,7 @@ class Vocabulary : public map { }; // word -> { code, ... } -using ReverseLookupTable = map>; +using ReverseLookupTable = hash_map>; } // namespace rime