# The Knuth-Morris-Pratt Algorithm in Perl and using index() to match a string pattern.

In light of my recent post using a brute-force string searching algorithm, I decided to post an implementation of the Knuth-Morris-Pratt algorithm in Perl. This implementation is essentially straight from the pseudo-code found on the wiki.

``````
#!/usr/bin/perl

use strict;
use warnings;
use Data::Dumper;

my \$string = "ABC ABCDAB ABCDABCDABDE";

my \$pattern = "ABCDABD";
my \$table_array = build_table(\$pattern);

print "found pattern at index: "
. search_string( \$string, \$pattern, \$table_array ),"\n";

sub search_string {

my \$string      = shift;
my \$pattern     = shift;
my \$table_array = shift;

my \$m = 0; # beginning of current match
my \$i = 0; # the position of the current character in pattern sought

my @split_string = split(//, \$string);
my @split_pattern = split(//, \$pattern);

while ( \$m + \$i < scalar(@split_string) ) {
if ( \$split_pattern[\$i] eq \$split_string[ \$m + \$i] ) {
if ( \$i == scalar(@split_pattern) - 1 ) {
return \$m;
}
\$i++;
}
else {
if ( @{\$table_array}[\$i] > -1 ) {
\$m = \$m + \$i - @{\$table_array}[\$i];
\$i = @{\$table_array}[\$i];
}
else {
\$i = 0;
\$m++;
}
}

}

return length(\$string);

}

sub build_table {

my \$string = shift;

my @split_string = split (//, \$string);

my \$pos = 2;
my \$cnd = 0;

@{\$table_array} = -1;
@{\$table_array} = 0;

while ( \$pos < scalar(@split_string) ) {
if ( \$split_string[\$pos-1] eq \$split_string[\$cnd] ) {
\$cnd++;
@{\$table_array}[\$pos] = \$cnd;
\$pos++;
}
elsif ( \$cnd > 0 ) {
\$cnd = @{\$table_array}[\$cnd];
}
else {
@{\$table_array}[\$pos] = 0;
\$pos++;
}

}

return \$table_array;
}
``````

It should be noted, however, that the index() function in Perl uses the Boyer-Moore algorithm under the hood- so implementing a string searching function like the following may be an easier, and faster solution. It takes a pattern and string (to search within) as arguments.

``````
use strict;
use warnings;
use Data::Dumper;

my \$matches = occurrences('CGATGGTCG',
'TCGATGGTAAATACTGTGCGATGGTCGATGGTTCGATGGTCGATGGTCGGGACGATGGTGGGCGATGGTGCGATGGTTCGATGGTACGATGGTCGATGGTACGATGGTCAGGGCGATGGTTAACGCGATGGTGGCAGTCGATGGTTGCGATGGTTCGATGGTCCCGATGGTGCGACGATGGTATTCCGATGGTTCGATGGTCGATGGTACTGCGATGGTCGATGGTACATCGATGGTATCCGATGGTCGATGGTGGCGATGGTCGATGGTCGATGGTCGATGGTGTTATCGATGGTCCGATGGTCGATGGTTAGCGATGGTTATAGGTATCCCGATGGTCGATGGTCGATGGTTACGATGGTCCGATGGTCGATGGTCTTTGTCGATGGTTCGATGGTCGATGGTAACGATGGTCGATGGTTTGTCGATGGTCGCGATGGTCGCCGATGGTGCCGATGGTGGGTCGATGGTGCTCGATGGTCGATGGTCCGCGATGGTTGCGTCGATGGTCGATGGTCGATGGTGGACTCGATGGTCACGATGGTTTCTCGATGGTGGTTCCGATGGTCGATGGTGTCGATGGTACGCAAGTACAGATAGTGCGATGGTGAGGATAGTGCGATGGTAGCGATGGTCGCGATGGTCGATGGTTACTTGCCTGCGATGGTGTGTACGATGGTCGGAACGCCCGATGGTGACGATGGTCATGCGATGGTATTCAATTCGATGGTCTCCGGCCGAAGAAAGCGATGGTCCCAAGATGATCGATGGTCGATGGTCGATGGTGTCGATGGTCCGATGGTCCGTTTCGATGGTACTTCGATGGTTTGCGATGGTATATGTCGATGGTCCAACGATGGTGGTGCGATGGTCTGCGATGGTA');

print join(' ', @{\$matches});
sub occurrences {

my( \$x, \$y ) = @_;

my \$pos = 0;
my \$matches = 0;
my @locations;

while (1) {
\$pos = index(\$y, \$x, \$pos);
last if(\$pos < 0);
\$matches++;
\$pos++;
push @locations, \$pos;
}

return \@locations;
}
``````