Knuth-Morris-Pratt Algorithm Examples

From CodeCodex

The Knuth-Morris-Pratt algorithm for string matching runs in O(m + n) time, where m is the length of the pattern and n is the length of the text to be searched. Compare this with the brute-force search time of O(mn) or the Boyer-Moore algorithm worst-case time of O(m + n).

Implementations[edit]

C[edit]

 #include <stdio.h>
 #include <stdlib.h>
 
 const char *kmp_search(const char *text, const char *pattern)
 {
     int *T;
     int i, j;
     const char *result = NULL;
 
     if (pattern[0] == '\0')
       return text;
 
     /* Construct the lookup table */
     T = malloc((strlen(pattern)+1) * sizeof *T);
     T[0] = -1;
     for (i=0; pattern[i] != '\0'; i++) {
         T[i+1] = T[i] + 1;
         while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])
           T[i+1] = T[T[i+1]-1] + 1;
     }
 
     /* Perform the search */
     for (i=j=0; text[i] != '\0'; ) {
         if (j < 0 || text[i] == pattern[j]) {
             ++i, ++j;
             if (pattern[j] == '\0') {
                 result = text+i-j;
                 break;
             }
         }
         else j = T[j];
     }
 
     free(T);
     return result;
 }

C#[edit]

/// <summary>
/// Submitted by Zachary Haberman, CWU.
/// </summary>
/// <param name="P">The pattern you are trying to match</param>
/// <param name="T">The Text you are searching through</param>
void kmp_search(string P, string T) {
   int n = T.Length;
   int m = P.Length;
   int[] pi = computePrefixFunction(P);
   int q = 0;

for (int i = 1; i <= n; i++) {
   while (q > 0 && P[q] != T[i - 1]) {
      q = pi[q-1];
   }
   if (P[q] == T[i - 1]) { q++; }
   if (q == m) {
      //Record a match was found here
      q = pi[q-1];
   }
}
}

/// <summary>
/// Submitted by Zachary Haberman, CWU.
/// </summary>
/// <param name="P">The pattern P</param>
int[] computePrefixFunction(string P) {
   int m = P.Length;
   int[] pi = new int[m];
   int k = 0;
   pi[0] = 0;

   for (int q = 1; q < m; q++) {
      while (k > 0 && P[k] != P[q]) { k = pi[k]; }
   
      if (P[k] == P[q]) { k++; }
      pi[q] = k;
   }
   return pi;
}

Java[edit]

 int searchKMP(char[] w, char[] s, int[] t)
 {
  int m=0;
  int i=0;
  while( ((m + i) < s.length) && (i<w.length) )
  {
   if(s[m+i] == w[i]) i++;
   else
   {
    m += i - t[i];
    if (i > 0) i = t[i];
    i++;
   }
  }
  if(i == w.length) return m;
  else return -1;
 }

Javascript[edit]

  function kmp_search(s, w) {
      var m = 0, i = 0, 
          pos, cnd, t,
          slen = s.length,
          wlen = w.length;
      
      /* String to array conversion */
      s = s.split("");
      w = w.split("");    
              
      /* Construct the lookup table */
      t = [-1, 0];
      for ( pos = 2, cnd = 0; pos < wlen; ) {
          if ( w[pos-1] === w[cnd] ) {
              t[pos] = cnd + 1;
              pos++; cnd++;
          }
          else if ( cnd > 0 )
            cnd = t[cnd];
          else 
            t[pos++] = 0;
      } 
      
      /* Perform the search */
      while ( m + i < slen ) {
          if ( s[m+i] === w[i] ) {
              i++;
              if ( i === wlen ) 
                return m;
          }
          else {
              m += i - t[i];
              if ( t[i] > -1 ) 
                  i = t[i];
              else
                  i = 0;
          }
      }
      return -1;
  }

Ada (83 and 95)[edit]

-- pattern_match_knuth_morris_pratt_test.adb an implementation for fixed strings

-- Written by Wikibob, 2004, from notes on the Knuth_Morris_Pratt pattern match algorithm
-- adapted to fixed strings of characters.
-- It is in the public domain.

-- If you are using GNAT, use gnatmake to compile and link this program.
-- To use the pattern match functions in your own software extract
-- the inner package's specification and body into separate files.

-- This program is self-contained and demonstrates a particular
-- implementation of the Knuth_Morris_Pratt algorithm applied to
-- fixed strings, with the following restrictions:
-- * the search pattern is limited to a maximum of 256 characters
-- * the caller must first call the function Pre_Compute on a pattern
--   to obtain a context variable containing the pre-computed pattern.
--   There is no limit to the number of contexts.
-- * the caller must handle the exception Pattern_Error that will
--   be raised if function Find_Location was unable to find the
--   pattern in the given string.

-- Suggested improvements to the inner package are:
-- * add type Result_T is record Location : Index; Found : Boolean; end record;
--   and use it instead of raising Pattern_Error.
-- * produce a version that dispenses with the Context and has Find_Location
--   perform the Pre_process internally.

-- References: http://ww0.java3.datastructures.net/handouts/PatternMatching.pdf

procedure Pattern_Match_Knuth_Morris_Pratt_Fixed_Test is
  -- You may extract this spec into file pattern_match.ads
  package Pattern_Match is
    Max_Pattern_Length : constant Positive := 256;
    type Context is private;
    function Pre_Compute (Pattern : in String) return Context;
    -- precomputes the table of skips for the Pattern.
    function Find_Location (Of_Context : in Context;
                            In_Text    : in String) return Positive;
    Pattern_Error : exception;
    -- alternative is return Natural and use 0 to mean not found.
  private
    subtype Pattern_Length_T is Positive range 1..Max_Pattern_Length;
    type Failure_Function_T is array (Pattern_Length_T) of Positive;
    subtype Slided_Pattern_T is String (1 .. Max_Pattern_Length);
    type Context is record
      Failure_Function : Failure_Function_T;
      M_Pattern        : Slided_Pattern_T;
      Pattern_Length   : Positive;
    end record;
  end Pattern_Match;
  
  -- Variables and data for testing.
  IFPLID_Context : Pattern_Match.Context;
  SRC_Context    : Pattern_Match.Context;
  Text_Test1 : constant String := "IMCHG DLH5877 -BEGIN ADDR -IFPLID AT05428113 -SRC FPL -RFL F330";
  Text_Test2 : constant String := "IMCHG DLH5877 EDDKCLHD -BEGIN ADDR -FAC CFMUTACT AA05428113 FPL -STAR WLD5M -SRC ";
  IFPLID_Pos   : Positive;
  IFPLID_Pos_2 : Positive := 1;
  SRC_Pos      : Positive;
  SRC_Pos_2    : Positive;
  
  -- You may extract this spec into file pattern_match.adb
  package body Pattern_Match is
    function Pre_Compute (Pattern : in String) return Context is
      I, J : Positive;
      Pattern_Context : Context;
    begin
      if Pattern = "" then
        raise Pattern_Error;
      end if;
      Pattern_Context.M_Pattern (1..Pattern'Length) := Pattern;
      Pattern_Context.Pattern_Length := Pattern'Length;
      Pattern_Context.Failure_Function (1) := 1;
      I := 2;
      J := 1;
      while I <= Pattern_Context.Pattern_Length loop
        if Pattern (I) = Pattern (J) then
          -- we have matched J + 1 chars.
          Pattern_Context.Failure_Function (I) := J + 1;
          I := I + 1;
          J := J + 1;
        elsif J > 1 then
          -- use failure function to shift Pattern
          J := Pattern_Context.Failure_Function (J - 1);
        else
          Pattern_Context.Failure_Function (I) := 1;
          I := I + 1;
        end if;
      end loop;
      return Pattern_Context;
    end Pre_Compute;
    
    function Find_Location (Of_Context : in Context;
                            In_Text    : in String) return Positive is
      subtype Slided_Text_T is String (1 .. In_Text'Length);
      Slided_Text : constant Slided_Text_T := Slided_Text_T (In_Text);
      I, J : Positive;
    begin
      I := 1;
      J := 1;
      while I <= Slided_Text'Last loop
        if Slided_Text (I) = Of_Context.M_Pattern (J) then
          if J = Of_Context.Pattern_Length then
            return I - J + 1;
          else
            I := I + 1;
            J := J + 1;
          end if;
        elsif J > 1 then
          J := Of_Context.Failure_Function (J - 1);
        else
          I := I + 1;
        end if;
      end loop;
      raise Pattern_Error;
      -- Or change function to return Natural and return 0.
    end Find_Location;
  end Pattern_Match;
  
  -- You may extract the rest of this file into file pattern_match_test.adb
  -- and modify accordingly.
  
  procedure Check_Pattern_Found (Pattern     : in String;
                                 At_Location : in Positive;
                                 In_Text     : in String) is
    subtype Slided_Text_T is String (1 .. Pattern'Length);
    Slided_Pattern : constant Slided_Text_T := Slided_Text_T (Pattern);
  begin
    if At_Location > In_Text'Last or else
      At_Location + Pattern'Length - 1 > In_Text'Last or else
       Slided_Text_T (In_Text (At_Location .. At_Location + Pattern'Length - 1)) /= Slided_Pattern
    then
      -- We expected Find_Location to return the location of the pattern, as it did not there is a program error.
      raise Program_Error;
    end if;
  end Check_Pattern_Found;

begin
  IFPLID_Context := Pattern_Match.Pre_Compute ("-IFPLID ");
  SRC_Context    := Pattern_Match.Pre_Compute ("-SRC ");
  
  Expect_Pattern_Found:
  begin
    IFPLID_Pos := Pattern_Match.Find_Location (Of_Context => IFPLID_Context,
                                               In_Text    => Text_Test1);
  exception
    when Pattern_Match.Pattern_Error =>
      -- We expected Find_Location to find the pattern, but it did not so there is a program error.
      raise Program_Error;
  end Expect_Pattern_Found;
  Check_Pattern_Found (Pattern     => "-IFPLID ",
                       At_Location => IFPLID_Pos,
                       In_Text     => Text_Test1);
    
  Expect_Pattern_Not_Found:
  begin
    IFPLID_Pos_2 := Pattern_Match.Find_Location (Of_Context => IFPLID_Context,
                                                 In_Text    => Text_Test2);
    -- We expected Find_Location to NOT find the pattern, but it did so there is a program error.
    raise Program_Error;
  exception
    when Pattern_Match.Pattern_Error =>
      -- We expected Find_Location to NOT find the pattern, and it did not so there is no error.
      null;
  end Expect_Pattern_Not_Found;
  if IFPLID_Pos_2 /= 1 then
    -- We expected Find_Location to NOT return a result, so there is a program error.
    raise Program_Error;
  end if;
  
  Expect_Second_Pattern_Found:
  begin
    SRC_Pos := Pattern_Match.Find_Location (Of_Context => SRC_Context,
                                            In_Text    => Text_Test1);
  exception
    when Pattern_Match.Pattern_Error =>
      -- We expected Find_Location to find the pattern, but it did not so there is a program error.
      raise Program_Error;
  end Expect_Second_Pattern_Found;
  Check_Pattern_Found (Pattern     => "-SRC ",
                       At_Location => SRC_Pos,
                       In_Text     => Text_Test1);
  
  Expect_Second_Pattern_Found_At_End:
  begin
    SRC_Pos_2 := Pattern_Match.Find_Location (Of_Context => SRC_Context,
                                              In_Text    => Text_Test2);
  exception
    when Pattern_Match.Pattern_Error =>
      -- We expected Find_Location to find the pattern, but it did not so there is a program error.
      raise Program_Error;
  end Expect_Second_Pattern_Found_At_End;
  Check_Pattern_Found (Pattern     => "-SRC ",
                       At_Location => SRC_Pos_2,
                       In_Text     => Text_Test2);

end Pattern_Match_Knuth_Morris_Pratt_Fixed_Test;

OCaml[edit]

 # let init_next p =
     let m = String.length p in
     let next = Array.make m 0 in
     let i = ref 1 and j = ref 0 in
     while !i < m - 1 do
       if p.[!i] = p.[!j] then begin incr i; incr j; next.(!i) <- !j end  else
       if !j = 0 then begin incr i; next.(!i) <- 0 end else j := next.(!j)
     done;
     next;;
 val init_next : string -> int array = <fun>

 # let kmp p =
     let next = init_next p and m = String.length p in
     fun s ->
       let n = String.length s and i = ref 0 and j = ref 0 in
       while !j < m && !i < n do
         if s.[!i] = p.[!j] then begin incr i; incr j end else
         if !j = 0 then incr i else j := next.(!j)
       done;
       if !j >= m then !i - m else raise Not_found;;
 val kmp : string -> string -> int = <fun>

Perl[edit]

 index $string, $pattern;

Ruby[edit]

def kmp_search(s, w)
  m = i = 0
  slen = s.length
  wlen = w.length
  t = kmp_table(w)
  while m+i < slen
    if w[i] == s[m+i]
      i += 1
      return m  if i == wlen
    else
      m += i - t[i]
      i = [0, t[i]].max
    end
  end
  -1
end

def kmp_table(w)
  pos = 2
  cnd = 0
  t = [-1, 0]
  wlen = w.length
  while pos < wlen
    if w[pos-1] == w[cnd]
      cnd += 1
      t[pos] = cnd
      pos += 1
    elsif cnd > 0
      cnd = t[cnd]
    else
      t[pos] = 0
      pos += 1
    end
  end
  t
end

Scheme[edit]

(define (init-next p)
  (let* ((m (string-length p))
         (next (make-vector m 0)))
    (let loop ((i 1) (j 0))
       (cond ((>= i (- m 1))
              next)
             ((char=? (string-ref p i) (string-ref p j))
              (let ((i (+ i 1))
                    (j (+ j 1)))
                 (vector-set! next i j)
                 (loop i j)))
             ((= j 0)
              (let ((i (+ i 1)))
                 (vector-set! next i 0)
                 (loop i j)))
             (else
              (loop i (vector-ref next j)))))))

(define (kmp p s)
  (let ((next (init-next p))
        (m (string-length p))
        (n (string-length s)))
    (let loop ((i 0) (j 0))
      (cond ((or (>= j m) (>= i n))
             (- i m))
            ((char=? (string-ref s i) (string-ref p j))
             (loop (+ i 1) (+ j 1)))
            ((= j 0)
             (loop (+ i 1) j))
            (else
             (loop i (vector-ref next j)))))))