diff --git a/include/ext/meta/concepts.hpp b/include/ext/meta/concepts.hpp new file mode 100644 index 0000000..269a157 --- /dev/null +++ b/include/ext/meta/concepts.hpp @@ -0,0 +1,17 @@ +// Copyright - 2020 - Jan Christoph Uhde +#ifndef EXT_META_CONCEPTS_HEADER +#define EXT_META_CONCEPTS_HEADER +#include + +namespace ext::meta { + +template +concept Associative = requires(Container cont) { + typename Container::key_type; + typename Container::mapped_type; + { cont.begin() } -> std::same_as; + { cont.end() } -> std::same_as; + }; + +} // namespace ext::meta +#endif // EXT_META_CONCEPTS_HEADER diff --git a/include/ext/structures/trie.hpp b/include/ext/structures/trie.hpp new file mode 100644 index 0000000..f1c80b2 --- /dev/null +++ b/include/ext/structures/trie.hpp @@ -0,0 +1,234 @@ +// Copyright - Jan Christoph Uhde +#ifndef EXT_STRUCTURES_TRIE_HEADER +#define EXT_STRUCTURES_TRIE_HEADER + +#include +#include +#include +#include +#include + +#include + +namespace ext::structures { +namespace detail_trie { + +inline std::vector str2vec(std::string const& str) { + std::vector rv; + for (auto const& c : str) rv.push_back(c); + return rv; +} + +template +std::string vec2str(std::vector const& vec) { + std::string rv; + for (auto const& c : vec) rv.push_back(c); + return rv; +} + +template +Value* find_or_null(std::unique_ptr>& map, Key const& key) { + if (map == nullptr) + return nullptr; + auto it = map->find(key); + return it != map->end() ? &(it->second) : nullptr; +} + +// node in trie +template +class node { + public: + using value_t = Value; + using label_t = Label; + using key_t = std::vector; + using children_t = std::map>; + + key_t prefix; + std::unique_ptr children; + std::unique_ptr value; + bool is_word = false; + + void ensure_children() { + if (children == nullptr) + this->children = std::make_unique(); + } + + void assign_data(key_t& prefix, std::unique_ptr& value, bool isw = true) { + this->prefix = std::move(prefix); + this->is_word = isw; + this->value = std::move(value); + } + + std::pair + insert_node(label_t const& label, key_t& prefix, std::unique_ptr& value, bool is_word) { + auto new_node = std::make_unique(); + ensure_children(); + auto [it, ok] = this->children->try_emplace(label, std::move(new_node)); + if (!ok) + return {this, false}; + auto rv = it->second.get(); + rv->assign_data(prefix, value, is_word); + return {rv, true}; + } +}; + +// description of how a node will be splitted +template +struct split { + using key_t = Key; + using label_t = typename key_t::value_type; + + key_t parent_prefix; + std::optional split_label; + key_t split_prefix; + std::optional insert_label; + key_t insert_prefix; +}; + +template +[[nodiscard]] constexpr std::size_t find_split_point(Key const& a, Key const& b) { + std::size_t rv = 0; + while (rv < std::min(a.size(), b.size())) { + if (a[rv] == b[rv]) + ++rv; + else + break; + } + return rv; +} + +// function to calculate the spilts +template +[[nodiscard]] split split_info(Key const& parent_prefix, Key const& insert_prefix) { + std::size_t sp = find_split_point(parent_prefix, insert_prefix); + + auto in_beg = insert_prefix.begin(); + auto in_sp = in_beg + sp; + auto in_sp_cpy = in_sp; + auto in_end = insert_prefix.end(); + + auto pa_beg = parent_prefix.begin(); + auto pa_sp = pa_beg + sp; + auto pa_end = parent_prefix.end(); + + std::optional split_label; + if (pa_sp != pa_end) + split_label = *(pa_sp++); + + std::optional insert_label; + if (in_sp != in_end) + insert_label = *(in_sp++); + + return split{ + .parent_prefix = Key(in_beg, in_sp_cpy), + .split_label = std::move(split_label), + .split_prefix = Key(pa_sp, pa_end), + .insert_label = std::move(insert_label), + .insert_prefix = Key(in_sp, in_end), + }; +} + + +} // namespace detail_trie + +template +class trie { + public: + using label_t = Label; + using value_t = Value; + + using node_t = detail_trie::node; + using key_t = typename node_t::key_t; + using children_t = typename node_t::children_t; + + [[nodiscard]] std::pair insert(key_t const& key, std::unique_ptr value = nullptr) { + using namespace detail_trie; + + auto [insert_parent, insert_prefix] = find_insert_parent(root.get(), key); + if (insert_parent == nullptr) + return {nullptr, false}; + + auto& ip = *insert_parent; + + if (insert_prefix.empty()) { + // must be inserted into ip + if (ip.is_word) + return {insert_parent, false}; + + ip.assign_data(insert_prefix, value); + return {insert_parent, true}; + } else if (!ip.is_word && ip.prefix.empty() && (ip.children == nullptr || ip.children->empty())) [[unlikely]] { + ip.assign_data(insert_prefix, value); + return {insert_parent, true}; + } + + split si = split_info(insert_parent->prefix, insert_prefix); + if (!si.insert_label.has_value()) + return {insert_parent, false}; + + // create split if neccessary + if (si.split_label.has_value()) { + auto split_children = std::move(ip.children); + ip.children = std::make_unique(); + + auto [split_node, ok] = ip.insert_node(si.split_label.value(), si.split_prefix, ip.value, ip.is_word); + if (!ok) { + ip.children = std::move(ip.children); // restore state + throw std::logic_error("this insert into the trie not fail"); + } + + split_node->children = std::move(split_children); + ip.prefix = si.parent_prefix; + ip.is_word = false; + ip.value = nullptr; + } + + return ip.insert_node(si.insert_label.value(), si.insert_prefix, value, true); + } + +#ifndef EXT_TEST + private: +#endif + + [[nodiscard]] static std::pair find_insert_parent(node_t* start, key_t key) { + std::pair rv(start, {}); + + if (start == nullptr || key.empty()) + return rv; + + auto key_it = key.begin(); + auto key_end = key.end(); + do { + // first check the prefix + // return if the key to insert is shorter or does not match + { + auto key_it_copy = key_it; // use this + for (auto const& label : rv.first->prefix) { + if (key_it == key_end || label != *key_it) { + std::move(key_it_copy, key_end, std::back_inserter(rv.second)); + return rv; + } + ++key_it; + } + } + // check if there is a child that matches or break + { + auto next = find_or_null(rv.first->children, *key_it); + if (next != nullptr) { + rv.first = next->get(); + ++key_it; + } else + break; + } + } while (key_it != key_end); + + std::move(key_it, key_end, std::back_inserter(rv.second)); + return rv; + } + + // member vars + std::unique_ptr root = std::make_unique(); +}; + +} // namespace ext::structures +#endif diff --git a/include_files.cmake b/include_files.cmake index c519e85..e7c2cd8 100644 --- a/include_files.cmake +++ b/include_files.cmake @@ -7,8 +7,10 @@ set(ext-basics-header "include/ext/memory/tagged_pointer.hpp" "include/ext/meta/basic.hpp" "include/ext/meta/basic_old.hpp" + "include/ext/meta/concepts.hpp" "include/ext/structures/binary_index_tree.hpp" "include/ext/structures/lru_cache.hpp" + "include/ext/structures/trie.hpp" "include/ext/util/basic.hpp" "include/ext/util/bit_tricks.hpp" "include/ext/util/cast.hpp" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f932532..87643fe 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -21,6 +21,7 @@ set(test-files "structures_binary_index_tree" "structures_lru_cache" + "structures_trie" "util_basic" "util_cast" diff --git a/tests/structures_trie.cpp b/tests/structures_trie.cpp new file mode 100644 index 0000000..246722c --- /dev/null +++ b/tests/structures_trie.cpp @@ -0,0 +1,194 @@ +// Copyright - 2023 - Jan Christoph Uhde +// Please see LICENSE.md for license or visit /~https://github.com/extcpp/basics +#include +#include +#include + +#define EXT_TEST +#include + + +[[nodiscard]] auto insert(auto& t, std::string s) { + using namespace ext::structures::detail_trie; + return t.insert(str2vec(s)); +} + +[[nodiscard]] auto get_child(auto& n, char c) { + using namespace ext::structures::detail_trie; + decltype(n->children->begin()->second.get()) rv = nullptr; + + if (n->children == nullptr) + return rv; + + auto* map = n->children.get(); + auto it = map->find(c); + if (it != map->end()) + if (it->second != nullptr) + rv = it->second.get(); + + return rv; +} + +[[nodiscard]] auto get_prefix(auto& n) { + using namespace ext::structures::detail_trie; + return vec2str(n->prefix); +} + +[[nodiscard]] auto opt2str(std::optional& opt) { + std::string rv; + if (opt.has_value()) + rv.push_back(opt.value()); + return rv; +} + +TEST(structures_trie, split_point) { + using namespace ext::structures::detail_trie; + { + auto actual = find_split_point(str2vec("123aa"), str2vec("123b")); + EXPECT_EQ(actual, 3); + } + { + auto actual = find_split_point(str2vec("a"), str2vec("")); + EXPECT_EQ(actual, 0); + } + { + auto actual = find_split_point(str2vec(""), str2vec("a")); + EXPECT_EQ(actual, 0); + } + { + auto actual = find_split_point(str2vec("a"), str2vec("b")); + EXPECT_EQ(actual, 0); + } +} + +TEST(structures_trie, splits) { + using namespace ext::structures::detail_trie; + { + auto actual = split_info(str2vec("123aa"), str2vec("123b")); + auto expected = split>{ + .parent_prefix = str2vec("123"), + .split_label = 'a', + .split_prefix = str2vec("a"), + .insert_label = 'b', + .insert_prefix = str2vec(""), + }; + + EXPECT_EQ(vec2str(expected.parent_prefix), vec2str(actual.parent_prefix)); + EXPECT_EQ(opt2str(expected.split_label), opt2str(actual.split_label)); + EXPECT_EQ(vec2str(expected.split_prefix), vec2str(actual.split_prefix)); + EXPECT_EQ(vec2str(expected.insert_prefix), vec2str(actual.insert_prefix)); + EXPECT_EQ(opt2str(expected.insert_label), opt2str(actual.insert_label)); + } + { + auto actual = split_info(str2vec(""), str2vec("")); + auto expected = split>{}; + + EXPECT_EQ(vec2str(expected.parent_prefix), vec2str(actual.parent_prefix)); + EXPECT_EQ(opt2str(expected.split_label), opt2str(actual.split_label)); + EXPECT_EQ(vec2str(expected.split_prefix), vec2str(actual.split_prefix)); + EXPECT_EQ(vec2str(expected.insert_prefix), vec2str(actual.insert_prefix)); + EXPECT_EQ(opt2str(expected.insert_label), opt2str(actual.insert_label)); + } + { + auto actual = split_info(str2vec(""), str2vec("a")); + auto expected = split>{ + .insert_label = 'a', + }; + + EXPECT_EQ(vec2str(expected.parent_prefix), vec2str(actual.parent_prefix)); + EXPECT_EQ(opt2str(expected.split_label), opt2str(actual.split_label)); + EXPECT_EQ(vec2str(expected.split_prefix), vec2str(actual.split_prefix)); + EXPECT_EQ(vec2str(expected.insert_prefix), vec2str(actual.insert_prefix)); + EXPECT_EQ(opt2str(expected.insert_label), opt2str(actual.insert_label)); + } + { + auto actual = split_info(str2vec("a"), str2vec("")); + auto expected = split>{ + .split_label = 'a', + }; + + EXPECT_EQ(vec2str(expected.parent_prefix), vec2str(actual.parent_prefix)); + EXPECT_EQ(opt2str(expected.split_label), opt2str(actual.split_label)); + EXPECT_EQ(vec2str(expected.split_prefix), vec2str(actual.split_prefix)); + EXPECT_EQ(vec2str(expected.insert_prefix), vec2str(actual.insert_prefix)); + EXPECT_EQ(opt2str(expected.insert_label), opt2str(actual.insert_label)); + } + { + auto actual = split_info(str2vec("a"), str2vec("b")); + auto expected = split>{ + .split_label = 'a', + .insert_label = 'b', + }; + + EXPECT_EQ(vec2str(expected.parent_prefix), vec2str(actual.parent_prefix)); + EXPECT_EQ(opt2str(expected.split_label), opt2str(actual.split_label)); + EXPECT_EQ(vec2str(expected.split_prefix), vec2str(actual.split_prefix)); + EXPECT_EQ(vec2str(expected.insert_prefix), vec2str(actual.insert_prefix)); + EXPECT_EQ(opt2str(expected.insert_label), opt2str(actual.insert_label)); + } +} + +TEST(structures_trie, simple_inserts) { + using namespace ext::structures::detail_trie; + + ext::structures::trie trie; + auto root = trie.root.get(); + + { + auto [ip, ok] = insert(trie, "aaacc"); + ASSERT_TRUE(ok); + ASSERT_EQ(root, ip); + // root [aaacc] + ASSERT_EQ(get_prefix(root), "aaacc"); + } + { + auto [_, ok] = trie.insert(str2vec("aaab")); + ASSERT_TRUE(ok); + // root(aaa) + // c/ \b + // n[c] n[] + ASSERT_EQ(get_prefix(root), "aaa"); + + auto aaacc = get_child(root, 'c'); + ASSERT_NE(aaacc, nullptr); + ASSERT_EQ(get_prefix(aaacc), "c"); + + auto aaab = get_child(root, 'b'); + ASSERT_NE(aaab, nullptr); + ASSERT_EQ(get_prefix(aaab), ""); + } + { + auto [insert_node, ok] = insert(trie, "bab"); + ASSERT_TRUE(ok); + // root() + // a/ \b + // n(aa) n[ab] + // c/ \b + // n[c] n[] + // + EXPECT_EQ(get_prefix(root), ""); + { + auto aaa = get_child(root, 'a'); + ASSERT_NE(aaa, nullptr); + EXPECT_EQ(get_prefix(aaa), "aa"); + { + { + auto aaacc = get_child(aaa, 'c'); + ASSERT_NE(aaacc, nullptr); + EXPECT_EQ(get_prefix(aaacc), "c"); + } + { + auto aaab = get_child(aaa, 'b'); + ASSERT_NE(aaab, nullptr); + EXPECT_EQ(get_prefix(aaab), ""); + } + } + } + { + auto bab = get_child(root, 'b'); + ASSERT_NE(bab, nullptr); + EXPECT_EQ(bab, insert_node); + EXPECT_EQ(get_prefix(bab), "ab"); + } + } +}