Trie Pattern Matching Examples

From CodeCodex

Implementations[edit]

Java[edit]

package com.superliminal.util;


/**
 * Implements very fast dictionary storage and retrieval.
 * Only depends upon the core String class.
 * 
 * @author Melinda Green - © 2010 Superliminal Software.
 * Free for all uses with attribution.
 */
public class TrieMap {
    /*
     * Implementation of a trie tree. (see http://en.wikipedia.org/wiki/Trie)
     * though I made it faster and more compact for long key strings 
     * by building tree nodes only as needed to resolve collisions.
     * Each letter of a key is the index into the following array.
     * Values stored in the array are either a Leaf containing the user's value or
     * another TrieMap node if more than one key shares the key prefix up to that point.
     * Null elements indicate unused, I.E. available slots.
     */
    private Object[] mChars = new Object[256];
    private Object mPrefixVal; // Used only for values of prefix keys.
    
    // Simple container for a string-value pair.
    private static class Leaf {
        public String mStr;
        public Object mVal;
        public Leaf(String str, Object val) {
            mStr = str;
            mVal = val;
        }
    }
    
    public TrieMap() {
    }

    public boolean isEmpty() {
        if(mPrefixVal != null) {
            return false;
        }
        for(Object o : mChars) {
            if(o != null) {
                return false;
            }
        }
        return true;
    }
    
    
    /**
     * Inserts a key/value pair.
     * 
     * @param key may be empty or contain low-order chars 0..255 but must not be null.
     * @param val Your data. Any data class except another TrieMap. Null values erase entries.
     */
    public void put(String key, Object val) {
        assert key != null;
        assert !(val instanceof TrieMap); // Only we get to store TrieMap nodes. TODO: Allow it.
        if(key.length() == 0) {
            // All of the original key's chars have been nibbled away 
            // which means this node will store this key as a prefix of other keys.
            mPrefixVal = val; // Note: possibly removes or updates an item.
            return;
        }
        char c = key.charAt(0);
        Object cObj = mChars[c];
        if(cObj == null) { // Unused slot means no collision so just store and return;
            if(val == null) {
                return; // Don't create a leaf to store a null value.
            }
            mChars[c] = new Leaf(key, val);
            return;
        }
        if(cObj instanceof TrieMap) {
            // Collided with an existing sub-branch so nibble a char and recurse.
            TrieMap childTrie = (TrieMap)cObj;
            childTrie.put(key.substring(1), val);
            if(val == null && childTrie.isEmpty()) {
                mChars[c] = null; // put() must have erased final entry so prune branch.
            }
            return;
        }
        // Collided with a leaf 
        if(val == null) {
            mChars[c] = null; // Null value means to remove any previously stored value.
            return;
        }
        assert cObj instanceof Leaf;
        // Sprout a new branch to hold the colliding items.
        Leaf cLeaf = (Leaf)cObj;
        TrieMap branch = new TrieMap();
        branch.put(key.substring(1), val); // Store new value in new subtree.
        branch.put(cLeaf.mStr.substring(1), cLeaf.mVal); // Plus the one we collided with.
        mChars[c] = branch;
    }


    /**
     * Retrieve a value for a given key or null if not found.
     */
    public Object get(String key) {
        assert key != null;
        if(key.length() == 0) {
            // All of the original key's chars have been nibbled away 
            // which means this key is a prefix of another.
            return mPrefixVal;
        }
        char c = key.charAt(0);
        Object cVal = mChars[c];
        if(cVal == null) {
            return null; // Not found.
        }
        assert cVal instanceof Leaf || cVal instanceof TrieMap;
        if(cVal instanceof TrieMap) { // Hash collision. Nibble first char, and recurse.
            return ((TrieMap)cVal).get(key.substring(1));
        }
        if(cVal instanceof Leaf) {
            // cVal contains a user datum, but does the key match its substring?
            Leaf cPair = (Leaf)cVal;
            if(key.equals(cPair.mStr)) {
                return cPair.mVal; // Return user's data value.
            }
        }
        return null; // Not found.
    }

    
    /**
     * Simple example test program.
     */
    public static void main(String[] args) {
        // Insert a bunch of key/value pairs.
        TrieMap trieMap = new TrieMap();
        trieMap.put("123", "456");
        trieMap.put("Java", "rocks");
        trieMap.put("Melinda", "too");
        trieMap.put("Moo", "cow"); // Will collide with "Melinda".
        trieMap.put("Moon", "walk"); // Collides with "Melinda" and turns "Moo" into a prefix.
        trieMap.put("", "Root"); // You can store one value at the empty key if you like.
        
        // Test for inserted, nonexistent, and deleted keys.
        System.out.println("123 = " + trieMap.get("123"));
        System.out.println("Java = " + trieMap.get("Java"));
        System.out.println("Melinda = " + trieMap.get("Melinda"));
        System.out.println("Moo = " + trieMap.get("Moo"));
        System.out.println("Moon = " + trieMap.get("Moon"));
        System.out.println("Mo = " + trieMap.get("Mo")); // Should return null.
        System.out.println("Empty key = " + trieMap.get("")); // Should return "Root".
        System.out.println("Moose = " + trieMap.get("Moose")); // Never added so should return null.
        System.out.println("Nothing = " + trieMap.get("Nothing")); // Ditto.
        trieMap.put("123", null); // Removes this leaf entry.
        System.out.println("After removal, 123 = " + trieMap.get("123")); // Should now return null.
        trieMap.put("Moo", null); // Removes this prefix entry. (Special case to test internal logic).
        System.out.println("After removal, Moo = " + trieMap.get("Moo")); // Should now return null.
        trieMap.put("Moon", null); // Internal test of branch pruning.
    }
}

Python[edit]

# Number of children in the trie, determined by size of alphabet
ALPHABET_SIZE = 26

class Node:
 """ Single node to be used in the trie.
 
 Can include various information like if the node is end of a word,
 number of words with prefix ending eith this node (starting from 
 head) etc.
 Here only a boolean value is_end to signify if the node is end of a
 word and a integer prefix_count to count number of words with given
 prefix (ending at this node from head) is used.
 """
 def __init__(self, is_end = False):
  """ Initialize the node with default value of is_end False """
  
  self.is_end = is_end
  self.prefix_count = 0
  self.children = [None for child in range(ALPHABET_SIZE)]

class Trie:
 """Class supporting build, insert, search and count of words with
 given prefix in Trie data structure. 
 
 >>> t = Trie()
 
 >>> t.insert('apple')
 
 >>> t.insert('banana')
 
 >>> t.insert('applet')
 
 >>> t.search('apple')
 True
 
 >>> t.search('app')
 False
 
 >>> t.search('bananaa')
 False
 
 >>> t.search('')
 False
 
 >>> t.count_words_with_prefix('app')
 2
 
 >>> t.insert('apple')
 
 >>> t.count_words_with_prefix('app')
 3
 
 >>> t.count_words_with_prefix('applet')
 1
 
 >>> t.count_words_with_prefix('BAN')
 
 >>> t.count_words_with_prefix('illegal')
 
 """
 
 def __init__(self):
  """ Initialize the Trie with a dummy node head """
  self.head = Node()
 
 def insert(self, word):
  """ Insert a word in the trie """
  current = self.head
  for letter in word:
   int_value = ord(letter) - ord('a')
   try:
    assert(0 <= int_value < 26)
   except Exception:
    raise Exception('Invalid Word', 
        'Latin small alphabets required')
   if current.children[int_value] is None:
    current.children[int_value] = Node()
   current = current.children[int_value]
   current.prefix_count += 1
  current.is_end = True
 
 def search(self, word):
  """ Search for given word in trie.return true if found and false
  if word is not in trie or is invalid (does not consist of latin small
  characters only).
  """
  current = self.head
  for letter in word:
   int_value = ord(letter) - ord('a')
   try:
    assert(0 <= int_value < 26)
   except Exception:
    return False
   if current.children[int_value] is None:
    return False
   else:
    current = current.children[int_value]
  return current.is_end
 
 def count_words_with_prefix(self, word):
  """ return number of words in the trie with have the given 
  argument as prefix or None if the word is not present in the
  trie or the word is invalid (does not consist of latin small
  characters only).
  """
  current = self.head
  for letter in word:
   int_value = ord(letter) - ord('a')
   try:
    assert(0 <= int_value < 26)
   except Exception:
    return None
   if current.children[int_value] is None:
    return None
   else:
    current = current.children[int_value]
  return current.prefix_count


if __name__ == "__main__":
 import doctest
 doctest.testmod()

Ruby[edit]

require 'dictionary'

describe Dictionary do
  before do
    @d = Dictionary.new
  end
  
  it "should be empty when created" do
    @d.words.should == []
  end

  it "should not include a word in an empty dictionary" do
    @d.include?('fish').should be_false
  end

  it "should not find a word in an empty dictionary" do
    @d.find('fi').should == []
  end

  it "should be able to add a word and check for inclusion" do
    @d.add('fish')
    @d.include?('fish').should be_true
  end

  it "should report its contents" do 
    @d.add("fish")
    @d.add("foul")
    @d.words.sort.should == ["fish", "foul"].sort
  end

  it "should report its contents even if one word is a substring of another" do 
    @d.add("cat")
    @d.add("finger")
    @d.add("fin")
    @d.words.sort.should == ["cat", "fin", "finger"].sort
  end

  it "should be able to add words in any order" do 
    @d.add("fin")
    @d.add("cat")
    @d.add("finger")
    @d.words.sort.should == ["cat", "fin", "finger"].sort
  end

  it "should not include a prefix that wasn't added as a word in and of itself" do
    @d.add('fish')
    @d.include?('fi').should be_false
  end

  it "should only consider non-empty strings as words" do
    @d.add('').should be_false
    @d.add(nil).should be_false
    @d.add(3).should be_false
    @d.add({}).should be_false
  end

  it "should find nothing if the prefix matches nothing" do
    @d.add('fiend')
    @d.add('great')
    @d.find('nothing').should == []
  end

  it "should find a word from a prefix" do
    @d.add('fish')
    @d.add('fiend')
    @d.add('great')
    @d.find('gr').should == ['great']
  end

  it "should find multiple matches from a prefix" do
    @d.add('fish')
    @d.add('fiend')
    @d.add('great')
    @d.find('fi').sort.should == ['fish', 'fiend'].sort
  end
  
  it "should find correct matches and disregard partial matches" do
    @d.add('fan')
    @d.add('fish')
    @d.add('fishy')
    @d.add('fiend')
    @d.add('great')
    @d.find('fi').sort.should == ['fish', 'fishy', 'fiend'].sort
  end
  
  it "should find correct matches even if one word is a substring of another" do
    @d.add('fishy')
    @d.add('fish')
    @d.find('fi').sort.should == ['fish', 'fishy'].sort
  end
  
end

Lisp[edit]

(defpackage :trie 
  (:use 
   :common-lisp) 
  (:export 
   :make-trie 
   :insert-trie 
   :search-trie) 
  (:documentation 
   "Library of ternary trees, inspired by Bentley & Sedgewick 
article.")) 
(in-package :trie) 
(defun make-trie () 
  "Return a trie object, an array, defined as: 
  0 = split-char 
  1 = lo-kid 
  2 = eq-kid 
  3 = hi-kid" 
  (make-array '(4) :element-type 'char :initial-contents '(nil nil nil 
nil))) 
(defun get-trie-val (trie index) 
  "Access the trie value defined by index: 
  0 = split-char 
  1 = lo-kid 
  2 = eq-kid 
  3 = hi-kid" 
  (aref trie index)) 
(defun get-split-char (trie) 
  (get-trie-val trie 0)) 
(defun get-lo-kid (trie) 
  (get-trie-val trie 1)) 
(defun get-eq-kid (trie) 
  (get-trie-val trie 2)) 
(defun get-hi-kid (trie) 
  (get-trie-val trie 3)) 
(defun set-trie-val (trie index val) 
  "Set the trie value defined by index: 
  0 = split-char 
  1 = lo-kid 
  2 = eq-kid 
  3 = hi-kid" 
  (setf (aref trie index) val)) 
(defun set-split-char (trie val) 
  (set-trie-val trie 0 val)) 
(defun set-lo-kid (trie val) 
  (set-trie-val trie 1 val)) 
(defun set-eq-kid (trie val) 
  (set-trie-val trie 2 val)) 
(defun set-hi-kid (trie val) 
  (set-trie-val trie 3 val)) 
(defun trie-empty (trie) 
  "A trie is empty if its split-char value is undefined." 
  (null (get-split-char trie))) 
(defun insert-trie (trie str) 
  "Add the string to the trie." 
  (when (> (length str) 0) 
    (let ((c (char-downcase (char str 0)))) 
      (when (trie-empty trie) 
        (setf trie (make-trie)) 
        (set-split-char trie c) 
        (set-lo-kid trie (make-trie)) 
        (set-eq-kid trie (make-trie)) 
        (set-hi-kid trie (make-trie))) 
      (cond ((char< c (get-split-char trie)) 
             (set-lo-kid trie (insert-trie (get-lo-kid trie) str))) 
            ((char= c (get-split-char trie)) 
             (set-eq-kid trie (insert-trie (get-eq-kid trie) (subseq 
str 1)))) 
            (t 
             (set-hi-kid trie (insert-trie (get-hi-kid trie) str)))))) 
  trie) 
(defun search-trie (trie str) 
  "Return t if the string exists in the trie, nil if it does not." 
  (if (> (length str) 0) 
    (let ((c (char-downcase (char str 0)))) 
      (cond ((null (get-split-char trie)) 
             nil) ; run off end w/o match 
            ((char< c (get-split-char trie)) 
             (search-trie (get-lo-kid trie) str)) 
            ((char> c (get-split-char trie)) 
             (search-trie (get-hi-kid trie) str)) 
            (t 
             (search-trie (get-eq-kid trie) (subseq str 1))))) 
    t)) 

ActionScript 3[edit]

package
{
   public class TNode
   {
      // the array of 4 children of this node
      public var cn:Vector.<TNode> = new Vector.<TNode>(4, true);
      public var c:String;   // character stored in the node
      public var next:TNode; // pointer to the next node (in linked list)
      public var isEnd:Boolean = false; // whether there is the end of word
   
      public function TNode(c:String):void
      {
         this.c = c; // assign the parameter to "c"
      }
   }
}

Searching a child for a character Since children are in fixed-length array + in linked lists, it is not so easy to get them, so we write this method:

public function GetChild(s:String):TNode
{
   // at first, take a look at the array "cn"
   var n:TNode = cn[ s.charCodeAt(0)% 4 ];
   if ( n == null) return null; // there is no child for "s"
   else    // there is some child, we loop through the linked list
      while (n.next != null)  
      {
         if (n.c == s) return n;
         n = n.next;
      }
   if (n.c == s) return n; // it was at the end of LL
   return null; // it wasn't anywhere
}

Adding a word into a Trie

public function AddWord(s:String):void
{
   // look for child for this char
   var n:TNode = GetChild(s.charAt(0));
   if(n==null) // there is no child
   {
      n = new TNode(s.charAt(0)); // we chreate one
      var pos:int = s.charCodeAt(0)% 4; // where should it be
      if(cn[pos] == null) cn[pos] = n;  // free position
      else // not free - add it to LL
      {
         var no:TNode = cn[ pos ];
         while(no.next != null) no = no.next;
         no.next = n;
      }
   }
   // add the rest of the word to it or set the flag
   if(s.length>1) n.AddWord(s.substring(1,s.length));
   else n.isEnd = true; // end of the word
}

Does Trie contain the word

public function Contains(s:String):Boolean
{
   // at the end of recursion we return isEnd
   if(s.length == 0) return isEnd;
   // we look at the proper child
   var n:TNode = GetChild(s.charAt(0));
   if(n==null) return false; // no such child
   else return n.Contains(s.substring(1,s.length)); // ask the child
}

Drawing a trie

function drawTrie(n:TNode, x:int, y:int, w:int, place:Sprite)
{
   var i:int;
   // at first, we add chilren to array
   var nodes:Array = new Array();
   for(i = 0; i<4; i++)
   {
      var no:TNode = n.cn[i]; if(no == null) continue;
      while(no != null){ nodes.push(no); no = no.next;}
   }
   var sW:int = w/nodes.length; // width for each node
   for (i = 0; i < nodes.length; i++)
   {
      with(place.graphics)  // line to child
      {
         lineStyle(3,0x00E1FF);
         moveTo(x, y);
         lineTo(x-w/2 + i*sW + sW/2, y+60);
      }
      // drawing a child
      drawTrie(nodes[i], x-w/2 + i*sW + sW/2, y+60, sW, place); 
   }
 
   // draw a round for this node
   with(place.graphics)
   {
      if(n.isEnd) lineStyle(4, 0x000000); // když je koncový
      else lineStyle(2, 0x333333);  // když není - tenčí okraj
      beginFill(0x6D00D9);
      drawEllipse(x-14, y-14, 28, 28);
      endFill();
   }
  
   // add text field with character
   var tf:TextField = new TextField();
   tf.text = n.c; tf.x = x-tf.textWidth; tf.y = y-14;
   place.addChild(tf);
}