KMP String Matching
Generated on 2025-07-10 02:07:45 Algorithm Cheatsheet for Technical Interviews
KMP String Matching Cheatsheet
Section titled “KMP String Matching Cheatsheet”1. Quick Overview
Section titled “1. Quick Overview”What is it? The Knuth-Morris-Pratt (KMP) algorithm is an efficient string searching algorithm that finds occurrences of a “pattern” string within a larger “text” string. It avoids redundant comparisons by utilizing a precomputed “longest proper prefix suffix” (LPS) array for the pattern.
When to use it? Use KMP when you need to find all occurrences of a pattern within a text string and performance is critical. It’s especially useful when the pattern contains repeating substrings. It is particularly efficient for long texts and patterns.
2. Time & Space Complexity
Section titled “2. Time & Space Complexity”| Operation | Time Complexity | Space Complexity |
|---|---|---|
| LPS Computation | O(m) | O(m) |
| String Matching | O(n) | O(1) |
| Total | O(n + m) | O(m) |
Where:
nis the length of the text string.mis the length of the pattern string.
3. Implementation
Section titled “3. Implementation”Python
Section titled “Python”def compute_lps(pattern): """Computes the LPS (Longest Proper Prefix Suffix) array for the pattern.""" m = len(pattern) lps = [0] * m length = 0 # Length of the previous longest prefix suffix i = 1
while i < m: if pattern[i] == pattern[length]: length += 1 lps[i] = length i += 1 else: if length != 0: length = lps[length - 1] # This is tricky. Consider 'AAACAAAA' else: lps[i] = 0 i += 1 return lps
def kmp_search(text, pattern): """Searches for the pattern in the text using the KMP algorithm.""" n = len(text) m = len(pattern) lps = compute_lps(pattern) i = 0 # index for text j = 0 # index for pattern occurrences = []
while i < n: if pattern[j] == text[i]: i += 1 j += 1
if j == m: occurrences.append(i - j) # Pattern found at index i-j j = lps[j - 1] # Move to the next possible match
# Mismatch after j matches elif i < n and pattern[j] != text[i]: if j != 0: j = lps[j - 1] else: i += 1
return occurrences
# Example Usagetext = "ABABDABACDABABCABAB"pattern = "ABABCABAB"occurrences = kmp_search(text, pattern)print(f"Pattern found at indices: {occurrences}") # Pattern found at indices: [10]public class KMP {
public static int[] computeLPSArray(String pattern) { int m = pattern.length(); int[] lps = new int[m]; int length = 0; // Length of the previous longest prefix suffix int i = 1;
while (i < m) { if (pattern.charAt(i) == pattern.charAt(length)) { length++; lps[i] = length; i++; } else { if (length != 0) { length = lps[length - 1]; // This is tricky. Consider 'AAACAAAA' } else { lps[i] = 0; i++; } } } return lps; }
public static int[] kmpSearch(String text, String pattern) { int n = text.length(); int m = pattern.length(); int[] lps = computeLPSArray(pattern); int i = 0; // index for text int j = 0; // index for pattern ArrayList<Integer> occurrencesList = new ArrayList<>();
while (i < n) { if (pattern.charAt(j) == text.charAt(i)) { i++; j++; }
if (j == m) { occurrencesList.add(i - j); // Pattern found at index i-j j = lps[j - 1]; // Move to the next possible match }
// Mismatch after j matches else if (i < n && pattern.charAt(j) != text.charAt(i)) { if (j != 0) { j = lps[j - 1]; } else { i++; } } }
return occurrencesList.stream().mapToInt(Integer::intValue).toArray();
}
public static void main(String[] args) { String text = "ABABDABACDABABCABAB"; String pattern = "ABABCABAB"; int[] occurrences = kmpSearch(text, pattern); System.out.println("Pattern found at indices: " + Arrays.toString(occurrences)); // Pattern found at indices: [10] }}#include <iostream>#include <vector>
using namespace std;
vector<int> computeLPSArray(const string& pattern) { int m = pattern.length(); vector<int> lps(m, 0); int length = 0; // Length of the previous longest prefix suffix int i = 1;
while (i < m) { if (pattern[i] == pattern[length]) { length++; lps[i] = length; i++; } else { if (length != 0) { length = lps[length - 1]; // This is tricky. Consider 'AAACAAAA' } else { lps[i] = 0; i++; } } } return lps;}
vector<int> kmpSearch(const string& text, const string& pattern) { int n = text.length(); int m = pattern.length(); vector<int> lps = computeLPSArray(pattern); int i = 0; // index for text int j = 0; // index for pattern vector<int> occurrences;
while (i < n) { if (pattern[j] == text[i]) { i++; j++; }
if (j == m) { occurrences.push_back(i - j); // Pattern found at index i-j j = lps[j - 1]; // Move to the next possible match }
// Mismatch after j matches else if (i < n && pattern[j] != text[i]) { if (j != 0) { j = lps[j - 1]; } else { i++; } } }
return occurrences;}
int main() { string text = "ABABDABACDABABCABAB"; string pattern = "ABABCABAB"; vector<int> occurrences = kmpSearch(text, pattern);
cout << "Pattern found at indices: "; for (int index : occurrences) { cout << index << " "; } cout << endl; // Pattern found at indices: 10 return 0;}4. Common Patterns
Section titled “4. Common Patterns”- Exact String Matching: The basic use case, as shown in the examples.
- Finding All Occurrences: The provided implementations return all starting indices where the pattern is found.
- Pattern Counting: Modify the
kmpSearchfunction to simply increment a counter each time the pattern is found instead of storing the index. - Substring Search: Used in text editors, search engines, and bioinformatics.
- Multiple Pattern Search: While KMP is designed for a single pattern, you can iterate through multiple patterns, applying KMP for each. For a truly efficient multiple pattern search, consider algorithms like Aho-Corasick.
5. Pro Tips
Section titled “5. Pro Tips”- Precomputation is Key: The LPS array computation is done once for the pattern. This amortizes the cost, making KMP very efficient for repeated searches with the same pattern.
- Understanding the LPS Array: The
lps[i]value represents the length of the longest proper prefix ofpattern[0...i]which is also a suffix ofpattern[0...i]. This is the heart of the KMP optimization. - Edge Cases: Handle empty text or pattern strings correctly. Returning an empty list or appropriate error code is usually best.
- Optimization: In some cases, if the alphabet is small, you might be able to optimize the LPS computation using a lookup table.
- Avoid: Naive string searching (O(n*m)) is often sufficient for small inputs. Don’t over-engineer if not needed.
6. Practice Problems
Section titled “6. Practice Problems”-
LeetCode 28. Implement strStr(): Implement
strStr()which is the string search function. Return the index of the first occurrence of needle in haystack, or -1 if needle is not part of haystack. (This is a classic KMP application). -
LeetCode 459. Repeated Substring Pattern: Given a string
s, check if it can be constructed by taking a substring of it and appending multiple copies of the substring together. (KMP can be used to find the longest repeated prefix suffix). -
Check if a string is a rotation of another string Given two strings s1 and s2, write a function to check if s2 is a rotation of s1. (using KMP to check if s1 is substring of s2+s2)
7. Templates
Section titled “7. Templates”These templates are designed for easy copy-pasting and modification. They provide the basic structure for KMP and LPS computation.
C++ Template
Section titled “C++ Template”#include <iostream>#include <vector>
using namespace std;
vector<int> computeLPSArray(const string& pattern) { int m = pattern.length(); vector<int> lps(m, 0); // Implement LPS computation here return lps;}
vector<int> kmpSearch(const string& text, const string& pattern) { int n = text.length(); int m = pattern.length(); vector<int> lps = computeLPSArray(pattern); vector<int> occurrences; // Implement KMP search here return occurrences;}
int main() { string text = "YOUR_TEXT"; string pattern = "YOUR_PATTERN"; vector<int> occurrences = kmpSearch(text, pattern);
cout << "Pattern found at indices: "; for (int index : occurrences) { cout << index << " "; } cout << endl; return 0;}Python Template
Section titled “Python Template”def compute_lps(pattern): """Computes the LPS (Longest Proper Prefix Suffix) array for the pattern.""" m = len(pattern) lps = [0] * m # Implement LPS computation here return lps
def kmp_search(text, pattern): """Searches for the pattern in the text using the KMP algorithm.""" n = len(text) m = len(pattern) lps = compute_lps(pattern) occurrences = [] # Implement KMP search here return occurrences
# Example Usagetext = "YOUR_TEXT"pattern = "YOUR_PATTERN"occurrences = kmp_search(text, pattern)print(f"Pattern found at indices: {occurrences}")Java Template
Section titled “Java Template”import java.util.ArrayList;import java.util.Arrays;
public class KMPTemplate {
public static int[] computeLPSArray(String pattern) { int m = pattern.length(); int[] lps = new int[m]; // Implement LPS computation here return lps; }
public static int[] kmpSearch(String text, String pattern) { int n = text.length(); int m = pattern.length(); int[] lps = computeLPSArray(pattern); ArrayList<Integer> occurrencesList = new ArrayList<>(); // Implement KMP search here return occurrencesList.stream().mapToInt(Integer::intValue).toArray(); }
public static void main(String[] args) { String text = "YOUR_TEXT"; String pattern = "YOUR_PATTERN"; int[] occurrences = kmpSearch(text, pattern); System.out.println("Pattern found at indices: " + Arrays.toString(occurrences)); }}