Trie,又称字典树,前缀树(prefix tree),是一种树形结构,用于保存大量的字符串。
它的优点是:利用字符串的公共前缀来节约存储空间。查找、插入复杂度为O(n),n为字符串长度。
它有3个基本性质:
1. 根节点不包含字符,除根节点外每一个节点都只包含一个字符。
2. 从根节点到某一节点,路径上经过的字符连接起来,为该节点对应的字符串。
3. 每个节点的所有子节点包含的字符都不相同。
假设有abc,abcd,abd, b, bcd,efg,hii这7个单词,可构建字典树如下:
查找一个字符串时,我们只需从根结点按字符串中字符出现顺序依次往下走。如果到最后字符串结束时,对应的结点标记为红色,则该字符串存在;否则不存在。
插入时也只需从根结点往下遍历,碰到已存在的字符结点就往下遍历,否则,建立新结点;最后标记最后一个字符的结点为红色即可。
实现:
#include
using namespace std;
const int kind=26;//字母种类
struct Treenode//树的结点结构
{
char
ch; //节点处的字符
bool
isColored; //是否标记为红色
int
count; //子节点个数
Treenode
*next[kind];//指向子结点
Treenode
*parent; //父节点
Treenode(char thech, Treenode* par)//结点初始化
{
ch=thech;
isColored=false;
count=0;
parent=par;
for(int i=0;i
next[i]=NULL;
}
};
void insert(Treenode *root,char
*word)//向以root为根结点的树中插入串word
{
Treenode
*location=root;
int
i=0,branch=0;
if(location==NULL) {
location=new Treenode(' ', NULL); //根节点字符为空,用空格(' ')表示
root=location;
}
while(word[i])
{
branch=word[i]-'a';
if(!location->next[branch])
location->next[branch]=new Treenode(word[i],
location);//如果不存在,建新结点
location->count++;
location=location->next[branch];
i++;
}
location->isColored = true; //标记节点为叶节点
}
Treenode* search(Treenode *root,char
*word)//查找,找到则返回相应节点指针
{
Treenode
*location=root;
int
i=0,branch=0;
if(location==NULL) return NULL;
while(word[i])
{
branch=word[i]-'a';
if(!location->next[branch]) return NULL;
location=location->next[branch];
i++;
}
if(location->isColored)
return location;
return
NULL;
}
char* longest_prefix(Treenode *root, const char *word)
//返回word的最长前缀
{
Treenode
*location=root;
int
i=0,branch=0;
if(location==NULL) return NULL;
while(word[i])
{
branch=word[i]-'a';
if(!location->next[branch]) break;
location=location->next[branch];
i++;
}
if(i == 0)
return NULL;
return
(char*)string(word).substr(0, i).c_str();
}
//获取所有以root为根的(红色)结点,并存放到allElement中
vector getAll(Treenode *root,
char *str, int i, vector
&allElement)
{
str[i] =
root->ch;
if(root->isColored)
{
str[i+1] = '\0';
char *temp = (char*)malloc(strlen(str)*sizeof(char));
strcpy(temp, str+1);
temp[strlen(str)-1]='\0';
allElement.push_back(temp);
}
for(int
j=0;j
{
if(root->next[j]!=NULL)
{
getAll(root->next[j],str,i+1, allElement);
}
}
}
//获取所有以word为前缀的红色结点,并存放到allElement中(不包含前缀,使用时需额外添加)
void autocomplete(Treenode *root, const char *word, char *str, int
i, vector
&allElement)
{
Treenode
*location=root;
int
j=0,branch=0;
if(location==NULL) return ;
while(word[j])
{
branch=word[j]-'a';
if(!location->next[branch]) return ;
location=location->next[branch];
j++;
}
getAll(location, str, i, allElement);
}
void remove(Treenode *root, const char *word)
{
Treenode
*target = search(root, word);
if(!target)
return ;
if(target->isColored)
target->isColored = false;
if(target->count == 0) {
//如果target没有子节点,则将其从父节点中移除。(不做此步亦可)
target->parent->next[target->ch
- 'a'] = NULL;
}
}
void print(Treenode *root, char *str, int i) //输出所有(红色)节点
{
str[i] =
root->ch;
if(root->isColored)
{
str[i+1] = '\0';
puts(str+1);
}
for(int
j=0;j
{
if(root->next[j]!=NULL)
{
print(root->next[j],str,i+1);
}
}
}
int main()
{
char
word[10];
char
ask[10];
char
str[20];
Treenode
*root=NULL;
cout<<"input the strings to build the
tire:\n";
while(gets(word))
{
if(word[0]=='\0') break;
insert(root,word);
}
vector allElement;
vector::iterator pos;
getAll(root,
str, 0, allElement);
for(pos =
allElement.begin(); pos != allElement.end(); ++pos) {
cout<<"ab"<<*pos<
}
cout<<"所有以ab为前缀的红色结点:\n";
autocomplete(root, "ab", str, 0, allElement);
for(pos =
allElement.begin(); pos != allElement.end(); ++pos) {
cout<<"ab"<<*pos<
}
//print(root, str, 0);
cout<<"abcd的最长前缀: ";
cout<<(char*)longest_prefix(root,
"abcd")<
cout<<"input a string to search:
";
gets(ask);
search(root,ask) == NULL ?
cout<
remove(root,ask);
cout<<"after delete
"<
print(root,
str, 0);
return
0;
}
扩展阅读: