持久 Trie |第 1 集(简介)
原文:https://www . geesforgeks . org/persistent-trie-set-1-introduction/
先决条件:
- 排序
- 数据结构的持久性
Trie 是一种方便的数据结构,在执行多个字符串查找时经常用到。在这篇文章中,我们将在这个数据结构中引入持久性的概念。坚持仅仅意味着保留改变。但是很明显,保留更改会导致额外的内存消耗,从而影响时间复杂度。
我们的目标是在 trie 中应用持久性,并确保它不会超过标准的 Trie 搜索,即O(key _ length)。我们还将分析持久性导致的超过 Trie 标准空间复杂性的额外空间复杂性。
让我们从版本的角度来考虑,也就是说,对于我们的 Trie 中的每个更改/插入,我们都会创建一个新版本。 我们将考虑我们的初始版本为版本-0。现在,当我们在 trie 中进行任何插入时,我们将为它创建一个新版本,并以类似的方式跟踪所有版本的记录。
但是每次为每个版本创建整个 trie 会使内存加倍,并严重影响空间复杂度。所以,这个想法对于大量版本来说很容易耗尽内存。
让我们利用这样一个事实:对于 trie 中的每个新插入,将访问/修改确切的 X (length_of_key)节点。所以,我们的新版本将只包含这些 X 新节点,其余三个节点将与上一版本相同。因此,很明显,对于每个新版本,我们只需要创建这些 X 新节点,而其余的 trie 节点可以从以前的版本中共享。
考虑下图以获得更好的可视化效果:
现在,问题来了:如何跟踪所有版本? 我们只需要跟踪所有版本的第一个根节点,这将用于跟踪不同版本中所有新创建的节点,因为根节点为我们提供了特定版本的入口点。为此,我们可以为所有版本维护一个指向 trie 根节点的指针数组。
我们来考虑一下下面的场景,看看如何使用 Persistent Trie 来解决! 给定一个字符串数组,我们需要确定一个字符串是否存在于数组中的某个 范围【l,r】内。举个例子,假设数组是字典中第 1 页的单词列表 (I 是数组的索引) 我们需要确定给定的单词 X 是否存在于页面范围[l,r]中?
以下是上述问题的实现:-
C++
// C++ implementation of the approach
#include <bits/stdc++.h>
using namespace std;
// Distinct numbers of chars in key
const int sz = 26;
// Persistent Trie node structure
struct PersistentTrie {
// Stores all children nodes, where ith children denotes
// ith alphabetical character
vector<PersistentTrie*> children;
// Marks the ending of the key
bool keyEnd = false;
// Constructor 1
PersistentTrie(bool keyEnd = false)
{
this->keyEnd = keyEnd;
}
// Constructor 2
PersistentTrie(vector<PersistentTrie*>& children, bool keyEnd = false)
{
this->children = children;
this->keyEnd = keyEnd;
}
// detects existence of key in trie
bool findKey(string& key, int len);
// Inserts key into trie
// returns new node after insertion
PersistentTrie* insert(string& key, int len);
};
// Dummy PersistentTrie node
PersistentTrie* dummy;
// Initialize dummy for easy implementation
void init()
{
dummy = new PersistentTrie(true);
// All children of dummy as dummy
vector<PersistentTrie*> children(sz, dummy);
dummy->children = children;
}
// Inserts key into current trie
// returns newly created trie node after insertion
PersistentTrie* PersistentTrie::insert(string& key, int len)
{
// If reached the end of key string
if (len == key.length()) {
// Create new trie node with current trie node
// marked as keyEnd
return new PersistentTrie((*this).children, true);
}
// Fetch current child nodes
vector<PersistentTrie*> new_version_PersistentTrie = (*this).children;
// Insert at key[len] child and
// update the new child node
PersistentTrie* tmpNode = new_version_PersistentTrie[key[len] - 'a'];
new_version_PersistentTrie[key[len] - 'a'] = tmpNode->insert(key, len + 1);
// Return a new node with modified key[len] child node
return new PersistentTrie(new_version_PersistentTrie);
}
// Returns the presence of key in current trie
bool PersistentTrie::findKey(string& key, int len)
{
// If reached end of key
if (key.length() == len)
// Return if this is a keyEnd in trie
return this->keyEnd;
// If we cannot find key[len] child in trie
// we say key doesn't exist in the trie
if (this->children[key[len] - 'a'] == dummy)
return false;
// Recursively search the rest of
// key length in children[key] trie
return this->children[key[len] - 'a']->findKey(key, len + 1);
}
// dfs traversal over the current trie
// prints all the keys present in the current trie
void printAllKeysInTrie(PersistentTrie* root, string& s)
{
int flag = 0;
for (int i = 0; i < sz; i++) {
if (root->children[i] != dummy) {
flag = 1;
s.push_back('a' + i);
printAllKeysInTrie(root->children[i], s);
s.pop_back();
}
}
if (flag == 0 and s.length() > 0)
cout << s << endl;
}
// Driver code
int main(int argc, char const* argv[])
{
// Initialize the PersistentTrie
init();
// Input keys
vector<string> keys({ "goku", "gohan", "goten", "gogeta" });
// Cache to store trie entry roots after each insertion
PersistentTrie* root[keys.size()];
// Marking first root as dummy
root[0] = dummy;
// Inserting all keys
for (int i = 1; i <= keys.size(); i++) {
// Caching new root for ith version of trie
root[i] = root[i - 1]->insert(keys[i - 1], 0);
}
int idx = 3;
cout << "All keys in trie after version - " << idx << endl;
string key = "";
printAllKeysInTrie(root[idx], key);
string queryString = "goku";
int l = 2, r = 3;
cout << "range : "
<< "[" << l << ", " << r << "]" << endl;
if (root[r]->findKey(queryString, 0) and !root[l - 1]->findKey(queryString, 0))
cout << queryString << " - exists in above range" << endl;
else
cout << queryString << " - does not exist in above range" << endl;
queryString = "goten";
l = 2, r = 4;
cout << "range : "
<< "[" << l << ", " << r << "]" << endl;
if (root[r]->findKey(queryString, 0) and !root[l - 1]->findKey(queryString, 0))
cout << queryString << " - exists in above range" << endl;
else
cout << queryString << " - does not exist in above range" << endl;
return 0;
}
Java 语言(一种计算机语言,尤用于创建网站)
// Java program for the above approach
import java.io.*;
import java.util.*;
// Persistent Trie node structure
class PersistentTrie
{
// Stores all children nodes, where
// ith children denotes ith
// alphabetical character
PersistentTrie[] children;
// Marks the ending of the key
boolean keyEnd = false;
// Constructor 1
PersistentTrie(boolean keyEnd)
{
this.keyEnd = keyEnd;
}
// Constructor 2
PersistentTrie(PersistentTrie[] children,
boolean keyEnd)
{
this.children = children;
this.keyEnd = keyEnd;
}
// Detects existence of key in trie
boolean findKey(String key, int len,
PersistentTrie dummy)
{
// If reached end of key
if (key.length() == len)
// Return if this is a keyEnd in trie
return this.keyEnd;
// If we cannot find key[len] child in trie
// we say key doesn't exist in the trie
if (this.children[key.charAt(len) - 'a'] == dummy)
return false;
// Recursively search the rest of
// key length in children[key] trie
return this.children[key.charAt(len) - 'a'].findKey(
key, len + 1, dummy);
}
// Inserts key into trie
// returns new node after insertion
PersistentTrie insert(String key, int len)
{
// If reached the end of key string
if (len == key.length())
{
// Create new trie node with current trie node
// marked as keyEnd
return new PersistentTrie(this.children.clone(),
true);
}
// Fetch current child nodes
PersistentTrie[] new_version_PersistentTrie
= this.children.clone();
// Insert at key[len] child and
// update the new child node
PersistentTrie tmpNode
= new_version_PersistentTrie[key.charAt(len)
- 'a'];
new_version_PersistentTrie[key.charAt(len) - 'a']
= tmpNode.insert(key, len + 1);
// Return a new node with modified key[len] child
// node
return new PersistentTrie(
new_version_PersistentTrie, false);
}
}
class GFG{
static final int sz = 26;
// Dummy PersistentTrie node
static PersistentTrie dummy;
// Initialize dummy for easy implementation
static void init()
{
dummy = new PersistentTrie(false);
// All children of dummy as dummy
PersistentTrie[] children = new PersistentTrie[sz];
for(int i = 0; i < sz; i++)
children[i] = dummy;
dummy.children = children;
}
// dfs traversal over the current trie
// prints all the keys present in the current trie
static void printAllKeysInTrie(PersistentTrie root,
String s)
{
int flag = 0;
for(int i = 0; i < sz; i++)
{
if (root.children[i] != dummy)
{
flag = 1;
printAllKeysInTrie(root.children[i],
s + ((char)('a' + i)));
}
if (root.children[i].keyEnd)
System.out.println(s + (char)('a' + i));
}
}
// Driver code
public static void main(String[] args)
{
// Initialize the PersistentTrie
init();
// Input keys
List<String> keys = Arrays.asList(new String[]{
"goku", "gohan", "goten", "gogeta" });
// Cache to store trie entry roots after each
// insertion
PersistentTrie[] root
= new PersistentTrie[keys.size() + 1];
// Marking first root as dummy
root[0] = dummy;
// Inserting all keys
for(int i = 1; i <= keys.size(); i++)
{
// Caching new root for ith version of trie
root[i]
= root[i - 1].insert(keys.get(i - 1), 0);
}
int idx = 3;
System.out.println("All keys in trie " +
"after version - " + idx);
String key = "";
printAllKeysInTrie(root[3], key);
String queryString = "goku";
int l = 2, r = 3;
System.out.println("range : " + "[" + l +
", " + r + "]");
if (root[r].findKey(queryString, 0, dummy) &&
!root[l - 1].findKey(queryString, 0, dummy))
System.out.println(queryString +
" - exists in above range");
else
System.out.println(queryString +
" - does not exist in " +
"above range");
queryString = "goten";
l = 2;
r = 4;
System.out.println("range : " + "[" + l +
", " + r + "]");
if (root[r].findKey(queryString, 0, dummy) &&
!root[l - 1].findKey(queryString, 0, dummy))
System.out.println(queryString +
" - exists in above range");
else
System.out.println(queryString +
" - does not exist in above range");
}
}
// This code is contributed by jithin
Output:
All keys in trie after version - 3
gohan
goku
goten
range : [2, 3]
goku - does not exist in above range
range : [2, 4]
goten - exists in above range
时间复杂度:如上所述,我们将在插入时访问 trie 中所有的 X (密钥长度)个节点;因此,我们将访问 X 个状态,并且在每个状态下,我们将通过为新创建的 trie 节点使用当前版本来喜欢上一版本的 sz 子节点,从而完成 O(sz) 数量的工作。因此,插入的时间复杂度变为0(密钥长度* sz) 。但是搜索在要搜索的关键字的长度上仍然是线性的,因此,搜索关键字的时间复杂度仍然是0(关键字的长度),就像标准的 trie 一样。 空间复杂性:显然,数据结构的持久性伴随着空间的交易,我们将消耗更多的内存来维护 trie 的不同版本。现在,让我们想象一下最坏的情况——对于插入,我们正在创建 O(length_of_key) 节点,每个新创建的节点将占用 O(sz) 的空间来存储其子节点。因此,插入上述实现的空间复杂度是0(密钥长度* sz)。
版权属于:月萌API www.moonapi.com,转载请注明出处