Difference between revisions of "Knuth-Morris-Pratt Algorithm Examples"

From CodeCodex

(OCaml)
Line 1: Line 1:
The Knuth-Morris-Pratt algorithm for string matching runs in O(''m'' + ''n'') time, where ''m'' is the length of the pattern and ''n'' is the length of the text to be searched. Compare this with the brute-force search time of O(''mn'') or the Boyer-Moore algorithm worst-case time of O(''m'' + ''n'').
+
The Knuth-Morris-Pratt algorithm for string matching runs in O(''m''   ''n'') time, where ''m'' is the length of the pattern and ''n'' is the length of the text to be searched. Compare this with the brute-force search time of O(''mn'') or the Boyer-Moore algorithm worst-case time of O(''m''   ''n'').
  
 
==Implementations==
 
==Implementations==
Line 16: Line 16:
 
   
 
   
 
     /* Construct the lookup table */
 
     /* Construct the lookup table */
     T = malloc((strlen(pattern)+1) * sizeof *T);
+
     T = malloc((strlen(pattern) 1) * sizeof *T);
 
     T[0] = -1;
 
     T[0] = -1;
     for (i=0; pattern[i] != '\0'; i++) {
+
     for (i=0; pattern[i] != '\0'; i ) {
         T[i+1] = T[i] + 1;
+
         T[i 1] = T[i]   1;
         while (T[i+1] > 0 && pattern[i] != pattern[T[i+1]-1])
+
         while (T[i 1] > 0
          T[i+1] = T[T[i+1]-1] + 1;
+
    }
+
+
    /* Perform the search */
+
    for (i=j=0; text[i] != '\0'; ) {
+
        if (j < 0 || text[i] == pattern[j]) {
+
            ++i, ++j;
+
            if (pattern[j] == '\0') {
+
                result = text+i-j;
+
                break;
+
            }
+
        }
+
        else j = T[j];
+
    }
+
+
    free(T);
+
    return result;
+
}
+
 
+
===Ada (83 and 95)===
+
<pre>
+
-- pattern_match_knuth_morris_pratt_test.adb an implementation for fixed strings
+
 
+
-- Written by Wikibob, 2004, from notes on the Knuth_Morris_Pratt pattern match algorithm
+
-- adapted to fixed strings of characters.
+
-- It is in the public domain.
+
 
+
-- If you are using GNAT, use gnatmake to compile and link this program.
+
-- To use the pattern match functions in your own software extract
+
-- the inner package's specification and body into separate files.
+
 
+
-- This program is self-contained and demonstrates a particular
+
-- implementation of the Knuth_Morris_Pratt algorithm applied to
+
-- fixed strings, with the following restrictions:
+
-- * the search pattern is limited to a maximum of 256 characters
+
-- * the caller must first call the function Pre_Compute on a pattern
+
--  to obtain a context variable containing the pre-computed pattern.
+
--  There is no limit to the number of contexts.
+
-- * the caller must handle the exception Pattern_Error that will
+
--  be raised if function Find_Location was unable to find the
+
--  pattern in the given string.
+
 
+
-- Suggested improvements to the inner package are:
+
-- * add type Result_T is record Location : Index; Found : Boolean; end record;
+
--  and use it instead of raising Pattern_Error.
+
-- * produce a version that dispenses with the Context and has Find_Location
+
--  perform the Pre_process internally.
+
 
+
-- References: http://ww0.java3.datastructures.net/handouts/PatternMatching.pdf
+
 
+
procedure Pattern_Match_Knuth_Morris_Pratt_Fixed_Test is
+
  -- You may extract this spec into file pattern_match.ads
+
  package Pattern_Match is
+
    Max_Pattern_Length : constant Positive := 256;
+
    type Context is private;
+
    function Pre_Compute (Pattern : in String) return Context;
+
    -- precomputes the table of skips for the Pattern.
+
    function Find_Location (Of_Context : in Context;
+
                            In_Text    : in String) return Positive;
+
    Pattern_Error : exception;
+
    -- alternative is return Natural and use 0 to mean not found.
+
  private
+
    subtype Pattern_Length_T is Positive range 1..Max_Pattern_Length;
+
    type Failure_Function_T is array (Pattern_Length_T) of Positive;
+
    subtype Slided_Pattern_T is String (1 .. Max_Pattern_Length);
+
    type Context is record
+
      Failure_Function : Failure_Function_T;
+
      M_Pattern        : Slided_Pattern_T;
+
      Pattern_Length  : Positive;
+
    end record;
+
  end Pattern_Match;
+
 
+
  -- Variables and data for testing.
+
  IFPLID_Context : Pattern_Match.Context;
+
  SRC_Context    : Pattern_Match.Context;
+
  Text_Test1 : constant String := "IMCHG DLH5877 -BEGIN ADDR -IFPLID AT05428113 -SRC FPL -RFL F330";
+
  Text_Test2 : constant String := "IMCHG DLH5877 EDDKCLHD -BEGIN ADDR -FAC CFMUTACT AA05428113 FPL -STAR WLD5M -SRC ";
+
  IFPLID_Pos  : Positive;
+
  IFPLID_Pos_2 : Positive := 1;
+
  SRC_Pos      : Positive;
+
  SRC_Pos_2    : Positive;
+
 
+
  -- You may extract this spec into file pattern_match.adb
+
  package body Pattern_Match is
+
    function Pre_Compute (Pattern : in String) return Context is
+
      I, J : Positive;
+
      Pattern_Context : Context;
+
    begin
+
      if Pattern = "" then
+
        raise Pattern_Error;
+
      end if;
+
      Pattern_Context.M_Pattern (1..Pattern'Length) := Pattern;
+
      Pattern_Context.Pattern_Length := Pattern'Length;
+
      Pattern_Context.Failure_Function (1) := 1;
+
      I := 2;
+
      J := 1;
+
      while I <= Pattern_Context.Pattern_Length loop
+
        if Pattern (I) = Pattern (J) then
+
          -- we have matched J + 1 chars.
+
          Pattern_Context.Failure_Function (I) := J + 1;
+
          I := I + 1;
+
          J := J + 1;
+
        elsif J > 1 then
+
          -- use failure function to shift Pattern
+
          J := Pattern_Context.Failure_Function (J - 1);
+
        else
+
          Pattern_Context.Failure_Function (I) := 1;
+
          I := I + 1;
+
        end if;
+
      end loop;
+
      return Pattern_Context;
+
    end Pre_Compute;
+
   
+
    function Find_Location (Of_Context : in Context;
+
                            In_Text    : in String) return Positive is
+
      subtype Slided_Text_T is String (1 .. In_Text'Length);
+
      Slided_Text : constant Slided_Text_T := Slided_Text_T (In_Text);
+
      I, J : Positive;
+
    begin
+
      I := 1;
+
      J := 1;
+
      while I <= Slided_Text'Last loop
+
        if Slided_Text (I) = Of_Context.M_Pattern (J) then
+
          if J = Of_Context.Pattern_Length then
+
            return I - J + 1;
+
          else
+
            I := I + 1;
+
            J := J + 1;
+
          end if;
+
        elsif J > 1 then
+
          J := Of_Context.Failure_Function (J - 1);
+
        else
+
          I := I + 1;
+
        end if;
+
      end loop;
+
      raise Pattern_Error;
+
      -- Or change function to return Natural and return 0.
+
    end Find_Location;
+
  end Pattern_Match;
+
 
+
  -- You may extract the rest of this file into file pattern_match_test.adb
+
  -- and modify accordingly.
+
 
+
  procedure Check_Pattern_Found (Pattern    : in String;
+
                                At_Location : in Positive;
+
                                In_Text    : in String) is
+
    subtype Slided_Text_T is String (1 .. Pattern'Length);
+
    Slided_Pattern : constant Slided_Text_T := Slided_Text_T (Pattern);
+
  begin
+
    if At_Location > In_Text'Last or else
+
      At_Location + Pattern'Length - 1 > In_Text'Last or else
+
      Slided_Text_T (In_Text (At_Location .. At_Location + Pattern'Length - 1)) /= Slided_Pattern
+
    then
+
      -- We expected Find_Location to return the location of the pattern, as it did not there is a program error.
+
      raise Program_Error;
+
    end if;
+
  end Check_Pattern_Found;
+
 
+
begin
+
  IFPLID_Context := Pattern_Match.Pre_Compute ("-IFPLID ");
+
  SRC_Context    := Pattern_Match.Pre_Compute ("-SRC ");
+
 
+
  Expect_Pattern_Found:
+
  begin
+
    IFPLID_Pos := Pattern_Match.Find_Location (Of_Context => IFPLID_Context,
+
                                              In_Text    => Text_Test1);
+
  exception
+
    when Pattern_Match.Pattern_Error =>
+
      -- We expected Find_Location to find the pattern, but it did not so there is a program error.
+
      raise Program_Error;
+
  end Expect_Pattern_Found;
+
  Check_Pattern_Found (Pattern    => "-IFPLID ",
+
                      At_Location => IFPLID_Pos,
+
                      In_Text    => Text_Test1);
+
   
+
  Expect_Pattern_Not_Found:
+
  begin
+
    IFPLID_Pos_2 := Pattern_Match.Find_Location (Of_Context => IFPLID_Context,
+
                                                In_Text    => Text_Test2);
+
    -- We expected Find_Location to NOT find the pattern, but it did so there is a program error.
+
    raise Program_Error;
+
  exception
+
    when Pattern_Match.Pattern_Error =>
+
      -- We expected Find_Location to NOT find the pattern, and it did not so there is no error.
+
      null;
+
  end Expect_Pattern_Not_Found;
+
  if IFPLID_Pos_2 /= 1 then
+
    -- We expected Find_Location to NOT return a result, so there is a program error.
+
    raise Program_Error;
+
  end if;
+
 
+
  Expect_Second_Pattern_Found:
+
  begin
+
    SRC_Pos := Pattern_Match.Find_Location (Of_Context => SRC_Context,
+
                                            In_Text    => Text_Test1);
+
  exception
+
    when Pattern_Match.Pattern_Error =>
+
      -- We expected Find_Location to find the pattern, but it did not so there is a program error.
+
      raise Program_Error;
+
  end Expect_Second_Pattern_Found;
+
  Check_Pattern_Found (Pattern    => "-SRC ",
+
                      At_Location => SRC_Pos,
+
                      In_Text    => Text_Test1);
+
 
+
  Expect_Second_Pattern_Found_At_End:
+
  begin
+
    SRC_Pos_2 := Pattern_Match.Find_Location (Of_Context => SRC_Context,
+
                                              In_Text    => Text_Test2);
+
  exception
+
    when Pattern_Match.Pattern_Error =>
+
      -- We expected Find_Location to find the pattern, but it did not so there is a program error.
+
      raise Program_Error;
+
  end Expect_Second_Pattern_Found_At_End;
+
  Check_Pattern_Found (Pattern    => "-SRC ",
+
                      At_Location => SRC_Pos_2,
+
                      In_Text    => Text_Test2);
+
 
+
end Pattern_Match_Knuth_Morris_Pratt_Fixed_Test;
+
</pre>
+
 
+
===OCaml===
+
# let init_next p =
+
    let m = String.length p in
+
    let next = Array.create m 0 in
+
    let i = ref 1 and j = ref 0 in
+
    while !i < m - 1 do
+
      if p.[!i] = p.[!j] then begin incr i; incr j; next.(!i) <- !j end  else
+
      if !j = 0 then begin incr i; next.(!i) <- 0 end else j := next.(!j)
+
    done;
+
    next;;
+
val init_next : string -> int array = <fun>
+
 
+
# let kmp p =
+
    let next = init_next p and m = String.length p in
+
    function s ->
+
      let n = String.length s and i = ref 0 and j = ref 0 in
+
      while !j < m && !i < n do
+
        if s.[!i] = p.[!j] then begin incr i; incr j end else
+
        if !j = 0 then incr i else j := next.(!j)
+
      done;
+
      if !j >= m then !i - m else raise Not_found;;
+
val kmp : string -> string -> int = <fun>
+
 
+
[[Category:C]]
+
[[Category:Ada]]
+
[[Category:String matching]]
+
[[Category:Objective_Caml]]
+

Revision as of 22:56, 11 April 2007

The Knuth-Morris-Pratt algorithm for string matching runs in O(m n) time, where m is the length of the pattern and n is the length of the text to be searched. Compare this with the brute-force search time of O(mn) or the Boyer-Moore algorithm worst-case time of O(m n).

Implementations

C

#include <stdio.h>
#include <stdlib.h>

const char *kmp_search(const char *text, const char *pattern)
{
    int *T;
    int i, j;
    const char *result = NULL;

    if (pattern[0] == '\0')
      return text;

    /* Construct the lookup table */
    T = malloc((strlen(pattern) 1) * sizeof *T);
    T[0] = -1;
    for (i=0; pattern[i] != '\0'; i  ) {
        T[i 1] = T[i]   1;
        while (T[i 1] > 0