Skip to content

Commit

Permalink
perf: replace rbtree with vector of pair (rime#684)
Browse files Browse the repository at this point in the history
  • Loading branch information
eagleoflqj authored and graphemecluster committed Aug 7, 2023
1 parent e3f626b commit 7d8f7ce
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 4 deletions.
10 changes: 8 additions & 2 deletions src/rime/dict/entry_collector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
//
// 2011-11-27 GONG Chen <chen.sst@gmail.com>
//
#include <algorithm>
#include <fstream>
#include <utility>
#include <boost/algorithm/string.hpp>
#include <boost/lexical_cast.hpp>
#include <rime/algo/strings.h>
Expand Down Expand Up @@ -192,12 +194,14 @@ void EntryCollector::CreateEntry(const string& word,
bool is_word = (e->raw_code.size() == 1);
if (is_word) {
auto& weights = words[e->text];
if (weights.find(code_str) != weights.end()) {
if (std::find_if(weights.begin(), weights.end(), [&](const auto& p) {
return p.first == code_str;
}) != weights.end()) {
LOG(WARNING) << "duplicate word definition '" << e->text << "': ["
<< code_str << "].";
return;
}
weights[code_str] += e->weight;
weights.push_back(std::make_pair(code_str, e->weight));
total_weight[e->text] += e->weight;
}
entries.emplace_back(std::move(e));
Expand All @@ -214,6 +218,8 @@ bool EntryCollector::TranslateWord(const string& word, vector<string>* result) {
}
const auto& w = words.find(word);
if (w != words.end()) {
std::sort(w->second.begin(), w->second.end(),
[](const auto& a, const auto& b) { return a.first < b.first; });
for (const auto& v : w->second) {
const double kMinimalWeight = 0.05; // 5%
double min_weight = total_weight[word] * kMinimalWeight;
Expand Down
6 changes: 4 additions & 2 deletions src/rime/dict/entry_collector.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ struct RawDictEntry {

// code -> weight
using WeightMap = map<string, double>;
// word -> { code -> weight }
using WordMap = hash_map<string, WeightMap>;
// word -> [ { code, weight } ]
// For the sake of memory usage, don't use word -> { code -> weight } as there
// may be many words, but may not be many representations for a word
using WordMap = hash_map<string, vector<pair<string, double>>>;
// [ (word, weight), ... ]
using EncodeQueue = std::queue<pair<string, string>>;

Expand Down

0 comments on commit 7d8f7ce

Please sign in to comment.