DEV Community

22191030258Yash
22191030258Yash

Posted on

trie data structure

Trie is an efficient information reTrieval data structure. Using Trie, search complexities can be brought to optimal limit (key length). If we store keys in binary search tree, a well balanced BST will need time proportional to M * log N, where M is maximum string length and N is number of keys in tree. Using Trie, we can search the key in O(M) time. However the penalty is on Trie storage requirements (Please refer Applications of Trie for more details)

Every node of Trie consists of multiple branches. Each branch represents a possible character of keys. We need to mark the last node of every key as end of word node. A Trie node field isEndOfWord is used to distinguish the node as end of word node. A simple structure to represent nodes of the English alphabet can be as following,

// Trie node
struct TrieNode
{
struct TrieNode *children[ALPHABET_SIZE];

 // isEndOfWord is true if the node
 // represents end of a word
 bool isEndOfWord;
Enter fullscreen mode Exit fullscreen mode

};

C program:

// C implementation of search and insert operations
// on Trie

include

include

include

include

define ARRAY_SIZE(a) sizeof(a)/sizeof(a[0])

// Alphabet size (# of symbols)

define ALPHABET_SIZE (26)

// Converts key current character into index
// use only 'a' through 'z' and lower case

define CHAR_TO_INDEX(c) ((int)c - (int)'a')

// trie node
struct TrieNode
{
struct TrieNode *children[ALPHABET_SIZE];

// isEndOfWord is true if the node represents
// end of a word
bool isEndOfWord;
Enter fullscreen mode Exit fullscreen mode

};

// Returns new trie node (initialized to NULLs)
struct TrieNode *getNode(void)
{
struct TrieNode *pNode = NULL;

pNode = (struct TrieNode *)malloc(sizeof(struct TrieNode));

if (pNode)
{
    int i;

    pNode->isEndOfWord = false;

    for (i = 0; i < ALPHABET_SIZE; i++)
        pNode->children[i] = NULL;
}

return pNode;
Enter fullscreen mode Exit fullscreen mode

}

// If not present, inserts key into trie
// If the key is prefix of trie node, just marks leaf node
void insert(struct TrieNode *root, const char *key)
{
int level;
int length = strlen(key);
int index;

struct TrieNode *pCrawl = root;

for (level = 0; level < length; level++)
{
    index = CHAR_TO_INDEX(key[level]);
    if (!pCrawl->children[index])
        pCrawl->children[index] = getNode();

    pCrawl = pCrawl->children[index];
}

// mark last node as leaf
pCrawl->isEndOfWord = true;
Enter fullscreen mode Exit fullscreen mode

}

// Returns true if key presents in trie, else false
bool search(struct TrieNode *root, const char *key)
{
int level;
int length = strlen(key);
int index;
struct TrieNode *pCrawl = root;

for (level = 0; level < length; level++)
{
    index = CHAR_TO_INDEX(key[level]);

    if (!pCrawl->children[index])
        return false;

    pCrawl = pCrawl->children[index];
}

return (pCrawl != NULL && pCrawl->isEndOfWord);
Enter fullscreen mode Exit fullscreen mode

}

// Driver
int main()
{
// Input keys (use only 'a' through 'z' and lower case)
char keys[][8] = {"the", "a", "there", "answer", "any",
"by", "bye", "their"};

char output[][32] = {"Not present in trie", "Present in trie"};


struct TrieNode *root = getNode();

// Construct trie
int i;
for (i = 0; i < ARRAY_SIZE(keys); i++)
    insert(root, keys[i]);

// Search for different keys
printf("%s --- %s\n", "the", output[search(root, "the")] );
printf("%s --- %s\n", "these", output[search(root, "these")] );
printf("%s --- %s\n", "their", output[search(root, "their")] );
printf("%s --- %s\n", "thaw", output[search(root, "thaw")] );

return 0;
Enter fullscreen mode Exit fullscreen mode

}

Top comments (0)