Resources
- Trie Overview
- Trie Overview + Implementation
- Trie Operations + Implementations
- In Depth Trie Explanation
- A good implementation of the delete operation
(Also worth mentioning is this article from the Visual Studio Magazine. The article explains tries and why you might want to use them.)
Takeaways:
- A trie is an ordered tree that compactly stores strings. Each node in a trie represents a single character in a string. Say we store the words "car" and "cars" in a trie - the trie would only use four nodes for both - one for each distinct letter of the words (c,a,r,s).
- They are also known as digital trees and prefix trees.
- A trie is most efficient when it is storing strings with similar prefixes, but in general tries are usually not space efficient. They have a space complexity of
O(n + k)
. - They are useful for queries involving string prefixes, such as suggesting the next character(s) for a given prefix.
- Tries are not usually found in standard libraries of popular languages (Java & C#, for example, do not have a trie data structure).
- Tries are typically implemented using arrays or hash tables.
- Although hash table implementations are more flexible, array implementations can be more efficient by placing limitations on the trie (like only allowing characters a-z)
- Many trie implementations will use booleans to mark the end of words/strings. But some implementations will use an additional node appended after the last character of the word. This additional node might contain a
NULL
value or a special character. - Similarly, the root/head node of a trie can be represented in various ways. For my implementations I used a special character (
$
) to denote the root node. - Inserting and searching are both
O(k)
operations. - A related data structure is a radix tree
- Radix trees are basically a space efficient trie. A trie only stores one character per node, whereas a radix tree can store many. A radix tree is essentially a trie where all nodes that are the only child are merged with their parent nodes - this means less total nodes are required for the same number of words.
Below you will find two trie implementations:
using System; | |
using System.Collections.Generic; | |
public class Program | |
{ | |
public class TrieNode | |
{ | |
public char Character { get; set; } | |
// IsLeaf/IsCompleteWord | |
public bool IsEndOfWord { get; set; } | |
public Dictionary<char, TrieNode> Children { get; set; } | |
public TrieNode() | |
{ | |
Children = new Dictionary<char, TrieNode>(); | |
} | |
public TrieNode(char character) | |
{ | |
Character = character; | |
Children = new Dictionary<char, TrieNode>(); | |
} | |
} | |
public class Trie | |
{ | |
private const char RootChar = '$'; | |
private readonly TrieNode Root; | |
public Trie() | |
{ | |
Root = new TrieNode(RootChar); | |
} | |
public void Insert(string word) | |
{ | |
TrieNode temp = Root; | |
for (int i = 0; i < word.Length; i++) | |
{ | |
var currentChar = word[i]; | |
if (!temp.Children.ContainsKey(currentChar)) | |
{ | |
temp.Children.Add(currentChar, new TrieNode(currentChar)); | |
} | |
temp = temp.Children[currentChar]; | |
} | |
temp.IsEndOfWord = true; | |
} | |
public bool Contains(string word) | |
{ | |
var temp = Root; | |
for (int i = 0; i < word.Length; i++) | |
{ | |
var currentChar = word[i]; | |
if (!temp.Children.ContainsKey(currentChar)) | |
{ | |
return false; | |
} | |
temp = temp.Children[currentChar]; | |
} | |
// temp != null && temp.IsEndOfWord; | |
return temp?.IsEndOfWord ?? false; | |
} | |
public void Delete(string word) | |
{ | |
DoDelete(Root, word, 0); | |
} | |
private void DoDelete(TrieNode node, string word, int index) | |
{ | |
if (index >= word.Length) | |
{ | |
return; | |
} | |
var currentChar = word[index]; | |
node.Children.TryGetValue(currentChar, out TrieNode nextNode); | |
if (nextNode == null) | |
{ | |
return; | |
} | |
DoDelete(nextNode, word, index + 1); | |
if (index == word.Length - 1) | |
{ | |
nextNode.IsEndOfWord = false; | |
} | |
if (nextNode != null | |
&& !nextNode.IsEndOfWord | |
&& HasNoChildren(nextNode)) | |
{ | |
node.Children.Remove(currentChar); | |
} | |
} | |
public char[] SuggestNextCharacters(string partialWord) | |
{ | |
TrieNode lastCharNode = Root; | |
for (int i = 0; i < partialWord.Length; i++) | |
{ | |
var currentChar = partialWord[i]; | |
if (!lastCharNode.Children.ContainsKey(currentChar)) | |
{ | |
lastCharNode = null; | |
break; | |
} | |
lastCharNode = lastCharNode.Children[currentChar]; | |
} | |
// lastCharNode.Children.Keys.ToArray() if using System.Linq | |
char[] nextChars = new char[lastCharNode.Children.Keys.Count]; | |
lastCharNode.Children.Keys.CopyTo(nextChars, 0); | |
return nextChars; | |
} | |
private bool HasNoChildren(TrieNode node) | |
{ | |
return node.Children.Count == 0; | |
} | |
} | |
// only allows a-z | |
public class TrieNode2 | |
{ | |
private const int MaxChars = 26; | |
public char Character { get; set; } | |
// IsLeaf/IsCompleteWord | |
public bool IsEndOfWord { get; set; } | |
public TrieNode2[] Children { get; set; } | |
public TrieNode2() | |
{ | |
Children = new TrieNode2[MaxChars]; | |
} | |
public TrieNode2(char character) | |
{ | |
Character = character; | |
Children = new TrieNode2[MaxChars]; | |
} | |
} | |
public class Trie2 | |
{ | |
private const char RootChar = '$'; | |
private readonly TrieNode2 Root; | |
public Trie2() | |
{ | |
Root = new TrieNode2(RootChar); | |
} | |
public void Insert(string word) | |
{ | |
TrieNode2 temp = Root; | |
for (int i = 0; i < word.Length; i++) | |
{ | |
// We do this to map a-z to 0-25, to fit in our 26 index array | |
// 'a' has a unicode value of 97, 'b' is 98, 'z' is 122. | |
// So after subtracting 97 ('a'), the characters map like so: | |
// 'a' to 0, 'b' to 1, 'z' to 25 etc. | |
var index = word[i] - 'a'; | |
if (temp.Children[index] == null) | |
{ | |
var currentChar = word[i]; | |
temp.Children[index] = new TrieNode2(currentChar); | |
} | |
temp = temp.Children[index]; | |
} | |
temp.IsEndOfWord = true; | |
} | |
public bool Contains(string word) | |
{ | |
var temp = Root; | |
for (int i = 0; i < word.Length; i++) | |
{ | |
var index = word[i] - 'a'; | |
if (temp.Children[index] == null) | |
{ | |
return false; | |
} | |
temp = temp.Children[index]; | |
} | |
// temp != null && temp.IsEndOfWord; | |
return temp?.IsEndOfWord ?? false; | |
} | |
public void Delete(string word) | |
{ | |
DoDelete(Root, word, 0); | |
} | |
private void DoDelete(TrieNode2 node, string word, int index) | |
{ | |
if (index >= word.Length) | |
{ | |
return; | |
} | |
var calcIndex = word[index] - 'a'; | |
var nextNode = node.Children[calcIndex]; | |
if (nextNode == null) | |
{ | |
return; | |
} | |
DoDelete(nextNode, word, index + 1); | |
if (index == word.Length - 1) | |
{ | |
nextNode.IsEndOfWord = false; | |
} | |
if (nextNode != null | |
&& !nextNode.IsEndOfWord | |
&& HasNoChildren(nextNode)) | |
{ | |
node.Children[calcIndex] = null; | |
} | |
} | |
public char[] SuggestNextCharacters(string partialWord) | |
{ | |
TrieNode2 lastCharNode = Root; | |
for (int i = 0; i < partialWord.Length; i++) | |
{ | |
var index = partialWord[i] - 'a'; | |
if (lastCharNode.Children[index] == null) | |
{ | |
lastCharNode = null; | |
break; | |
} | |
lastCharNode = lastCharNode.Children[index]; | |
} | |
// lastCharNode.Children.Keys.ToArray() if using System.Linq | |
char[] nextChars = new char[lastCharNode.Children.Length]; | |
for (int i = 0; i < lastCharNode.Children.Length; i++) | |
{ | |
if (lastCharNode.Children[i] != null) | |
{ | |
nextChars[i] = lastCharNode.Children[i].Character; | |
} | |
} | |
return nextChars; | |
} | |
private bool HasNoChildren(TrieNode2 node) | |
{ | |
for (int i = 0; i < node.Children.Length; i++) | |
{ | |
if (node.Children[i] != null) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
} | |
public static void Main() | |
{ | |
var t1 = new Trie(); | |
var t2 = new Trie2(); | |
t1.Insert("maria"); | |
t1.Insert("mariana"); | |
t2.Insert("maria"); | |
t2.Insert("mariana"); | |
t1.SuggestNextCharacters("mar"); | |
t2.SuggestNextCharacters("mar"); | |
Console.WriteLine(t1.Contains("ana")); | |
Console.WriteLine(t2.Contains("ana")); | |
Console.WriteLine(t1.Contains("mariana")); | |
Console.WriteLine(t2.Contains("mariana")); | |
t1.Delete("mariana"); | |
t2.Delete("mariana"); | |
Console.WriteLine(t1.Contains("ana")); | |
Console.WriteLine(t2.Contains("ana")); | |
Console.WriteLine(t1.Contains("mariana")); | |
Console.WriteLine(t2.Contains("mariana")); | |
} | |
} |
As always, if you found any errors in this post please let me know!
Top comments (0)