Skip to content

KMP String Matching

Generated on 2025-07-10 02:07:45 Algorithm Cheatsheet for Technical Interviews


What is it? The Knuth-Morris-Pratt (KMP) algorithm is an efficient string searching algorithm that finds occurrences of a “pattern” string within a larger “text” string. It avoids redundant comparisons by utilizing a precomputed “longest proper prefix suffix” (LPS) array for the pattern.

When to use it? Use KMP when you need to find all occurrences of a pattern within a text string and performance is critical. It’s especially useful when the pattern contains repeating substrings. It is particularly efficient for long texts and patterns.

OperationTime ComplexitySpace Complexity
LPS ComputationO(m)O(m)
String MatchingO(n)O(1)
TotalO(n + m)O(m)

Where:

  • n is the length of the text string.
  • m is the length of the pattern string.
def compute_lps(pattern):
"""Computes the LPS (Longest Proper Prefix Suffix) array for the pattern."""
m = len(pattern)
lps = [0] * m
length = 0 # Length of the previous longest prefix suffix
i = 1
while i < m:
if pattern[i] == pattern[length]:
length += 1
lps[i] = length
i += 1
else:
if length != 0:
length = lps[length - 1] # This is tricky. Consider 'AAACAAAA'
else:
lps[i] = 0
i += 1
return lps
def kmp_search(text, pattern):
"""Searches for the pattern in the text using the KMP algorithm."""
n = len(text)
m = len(pattern)
lps = compute_lps(pattern)
i = 0 # index for text
j = 0 # index for pattern
occurrences = []
while i < n:
if pattern[j] == text[i]:
i += 1
j += 1
if j == m:
occurrences.append(i - j) # Pattern found at index i-j
j = lps[j - 1] # Move to the next possible match
# Mismatch after j matches
elif i < n and pattern[j] != text[i]:
if j != 0:
j = lps[j - 1]
else:
i += 1
return occurrences
# Example Usage
text = "ABABDABACDABABCABAB"
pattern = "ABABCABAB"
occurrences = kmp_search(text, pattern)
print(f"Pattern found at indices: {occurrences}") # Pattern found at indices: [10]
public class KMP {
public static int[] computeLPSArray(String pattern) {
int m = pattern.length();
int[] lps = new int[m];
int length = 0; // Length of the previous longest prefix suffix
int i = 1;
while (i < m) {
if (pattern.charAt(i) == pattern.charAt(length)) {
length++;
lps[i] = length;
i++;
} else {
if (length != 0) {
length = lps[length - 1]; // This is tricky. Consider 'AAACAAAA'
} else {
lps[i] = 0;
i++;
}
}
}
return lps;
}
public static int[] kmpSearch(String text, String pattern) {
int n = text.length();
int m = pattern.length();
int[] lps = computeLPSArray(pattern);
int i = 0; // index for text
int j = 0; // index for pattern
ArrayList<Integer> occurrencesList = new ArrayList<>();
while (i < n) {
if (pattern.charAt(j) == text.charAt(i)) {
i++;
j++;
}
if (j == m) {
occurrencesList.add(i - j); // Pattern found at index i-j
j = lps[j - 1]; // Move to the next possible match
}
// Mismatch after j matches
else if (i < n && pattern.charAt(j) != text.charAt(i)) {
if (j != 0) {
j = lps[j - 1];
} else {
i++;
}
}
}
return occurrencesList.stream().mapToInt(Integer::intValue).toArray();
}
public static void main(String[] args) {
String text = "ABABDABACDABABCABAB";
String pattern = "ABABCABAB";
int[] occurrences = kmpSearch(text, pattern);
System.out.println("Pattern found at indices: " + Arrays.toString(occurrences)); // Pattern found at indices: [10]
}
}
#include <iostream>
#include <vector>
using namespace std;
vector<int> computeLPSArray(const string& pattern) {
int m = pattern.length();
vector<int> lps(m, 0);
int length = 0; // Length of the previous longest prefix suffix
int i = 1;
while (i < m) {
if (pattern[i] == pattern[length]) {
length++;
lps[i] = length;
i++;
} else {
if (length != 0) {
length = lps[length - 1]; // This is tricky. Consider 'AAACAAAA'
} else {
lps[i] = 0;
i++;
}
}
}
return lps;
}
vector<int> kmpSearch(const string& text, const string& pattern) {
int n = text.length();
int m = pattern.length();
vector<int> lps = computeLPSArray(pattern);
int i = 0; // index for text
int j = 0; // index for pattern
vector<int> occurrences;
while (i < n) {
if (pattern[j] == text[i]) {
i++;
j++;
}
if (j == m) {
occurrences.push_back(i - j); // Pattern found at index i-j
j = lps[j - 1]; // Move to the next possible match
}
// Mismatch after j matches
else if (i < n && pattern[j] != text[i]) {
if (j != 0) {
j = lps[j - 1];
} else {
i++;
}
}
}
return occurrences;
}
int main() {
string text = "ABABDABACDABABCABAB";
string pattern = "ABABCABAB";
vector<int> occurrences = kmpSearch(text, pattern);
cout << "Pattern found at indices: ";
for (int index : occurrences) {
cout << index << " ";
}
cout << endl; // Pattern found at indices: 10
return 0;
}
  • Exact String Matching: The basic use case, as shown in the examples.
  • Finding All Occurrences: The provided implementations return all starting indices where the pattern is found.
  • Pattern Counting: Modify the kmpSearch function to simply increment a counter each time the pattern is found instead of storing the index.
  • Substring Search: Used in text editors, search engines, and bioinformatics.
  • Multiple Pattern Search: While KMP is designed for a single pattern, you can iterate through multiple patterns, applying KMP for each. For a truly efficient multiple pattern search, consider algorithms like Aho-Corasick.
  • Precomputation is Key: The LPS array computation is done once for the pattern. This amortizes the cost, making KMP very efficient for repeated searches with the same pattern.
  • Understanding the LPS Array: The lps[i] value represents the length of the longest proper prefix of pattern[0...i] which is also a suffix of pattern[0...i]. This is the heart of the KMP optimization.
  • Edge Cases: Handle empty text or pattern strings correctly. Returning an empty list or appropriate error code is usually best.
  • Optimization: In some cases, if the alphabet is small, you might be able to optimize the LPS computation using a lookup table.
  • Avoid: Naive string searching (O(n*m)) is often sufficient for small inputs. Don’t over-engineer if not needed.
  1. LeetCode 28. Implement strStr(): Implement strStr() which is the string search function. Return the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack. (This is a classic KMP application).

  2. LeetCode 459. Repeated Substring Pattern: Given a string s, check if it can be constructed by taking a substring of it and appending multiple copies of the substring together. (KMP can be used to find the longest repeated prefix suffix).

  3. Check if a string is a rotation of another string Given two strings s1 and s2, write a function to check if s2 is a rotation of s1. (using KMP to check if s1 is substring of s2+s2)

These templates are designed for easy copy-pasting and modification. They provide the basic structure for KMP and LPS computation.

#include <iostream>
#include <vector>
using namespace std;
vector<int> computeLPSArray(const string& pattern) {
int m = pattern.length();
vector<int> lps(m, 0);
// Implement LPS computation here
return lps;
}
vector<int> kmpSearch(const string& text, const string& pattern) {
int n = text.length();
int m = pattern.length();
vector<int> lps = computeLPSArray(pattern);
vector<int> occurrences;
// Implement KMP search here
return occurrences;
}
int main() {
string text = "YOUR_TEXT";
string pattern = "YOUR_PATTERN";
vector<int> occurrences = kmpSearch(text, pattern);
cout << "Pattern found at indices: ";
for (int index : occurrences) {
cout << index << " ";
}
cout << endl;
return 0;
}
def compute_lps(pattern):
"""Computes the LPS (Longest Proper Prefix Suffix) array for the pattern."""
m = len(pattern)
lps = [0] * m
# Implement LPS computation here
return lps
def kmp_search(text, pattern):
"""Searches for the pattern in the text using the KMP algorithm."""
n = len(text)
m = len(pattern)
lps = compute_lps(pattern)
occurrences = []
# Implement KMP search here
return occurrences
# Example Usage
text = "YOUR_TEXT"
pattern = "YOUR_PATTERN"
occurrences = kmp_search(text, pattern)
print(f"Pattern found at indices: {occurrences}")
import java.util.ArrayList;
import java.util.Arrays;
public class KMPTemplate {
public static int[] computeLPSArray(String pattern) {
int m = pattern.length();
int[] lps = new int[m];
// Implement LPS computation here
return lps;
}
public static int[] kmpSearch(String text, String pattern) {
int n = text.length();
int m = pattern.length();
int[] lps = computeLPSArray(pattern);
ArrayList<Integer> occurrencesList = new ArrayList<>();
// Implement KMP search here
return occurrencesList.stream().mapToInt(Integer::intValue).toArray();
}
public static void main(String[] args) {
String text = "YOUR_TEXT";
String pattern = "YOUR_PATTERN";
int[] occurrences = kmpSearch(text, pattern);
System.out.println("Pattern found at indices: " + Arrays.toString(occurrences));
}
}