Rabin-Karp Algorithm
Generated on 2025-07-10 02:08:07 Algorithm Cheatsheet for Technical Interviews
Rabin-Karp Algorithm Cheatsheet
Section titled “Rabin-Karp Algorithm Cheatsheet”1. Quick Overview
Section titled “1. Quick Overview”What it is: A string searching algorithm that uses hashing to find occurrences of a pattern string within a larger text. It efficiently reduces the number of character comparisons by comparing hash values of substrings instead of the substrings themselves.
When to use it:
- Searching for multiple patterns in a text.
- When the pattern size is relatively small compared to the text size.
- When a fast average-case performance is desired, even if worst-case is possible.
- Suitable for rolling hash implementations where the hash of the next substring can be computed from the current one in O(1) time.
2. Time & Space Complexity
Section titled “2. Time & Space Complexity”| Operation | Time Complexity | Space Complexity |
|---|---|---|
| Preprocessing | O(m) | O(1) |
| Searching | O(n) (Avg/Best) | O(1) |
| Searching | O(nm) (Worst) | O(1) |
| Overall (Avg) | O(n + m) | O(1) |
| Overall (Worst) | O(nm) | O(1) |
n: Length of the text stringm: Length of the pattern string
Worst-Case Scenario: Occurs when the hash function produces many collisions (different substrings having the same hash value). This can lead to O(nm) time complexity because we have to compare the strings character by character for each potential match.
3. Implementation
Section titled “3. Implementation”Here are examples in Python, Java, and C++:
Python:
def rabin_karp(text, pattern, prime=101): """ Finds all occurrences of pattern in text using Rabin-Karp algorithm.
Args: text: The text string to search within. pattern: The pattern string to search for. prime: A prime number used for hashing (helps reduce collisions).
Returns: A list of starting indices where the pattern is found in the text. """ n = len(text) m = len(pattern) if m > n: return []
pattern_hash = 0 text_hash = 0 h = 1 # h = pow(prime, m-1) % prime
for i in range(m - 1): h = (h * prime) # Calculate h = prime^(m-1)
# Calculate initial hash values for pattern and first substring of text for i in range(m): pattern_hash = (prime * pattern_hash + ord(pattern[i])) text_hash = (prime * text_hash + ord(text[i]))
indices = []
# Slide the pattern over text one by one for i in range(n - m + 1): # Check the hash values of current window of text and pattern if pattern_hash == text_hash: # If the hash values match, then check for actual match if pattern == text[i:i + m]: indices.append(i)
# Calculate hash value for next window of text: Remove leading digit, add trailing digit if i < n - m: text_hash = (prime * (text_hash - ord(text[i]) * h) + ord(text[i + m]))
return indices
# Example Usagetext = "ABABDABACDABABCABAB"pattern = "ABABCABAB"result = rabin_karp(text, pattern)print(f"Pattern found at indices: {result}") # Output: Pattern found at indices: [10]Java:
public class RabinKarp {
public static void rabinKarp(String text, String pattern, int prime) { int n = text.length(); int m = pattern.length(); if (m > n) { return; }
long patternHash = 0; long textHash = 0; long h = 1;
// Calculate h = prime^(m-1) for (int i = 0; i < m - 1; i++) { h = (h * prime); }
// Calculate initial hash values for pattern and first substring of text for (int i = 0; i < m; i++) { patternHash = (prime * patternHash + pattern.charAt(i)); textHash = (prime * textHash + text.charAt(i)); }
// Slide the pattern over text one by one for (int i = 0; i <= n - m; i++) { // Check the hash values of current window of text and pattern if (patternHash == textHash) { // If the hash values match, then check for actual match if (pattern.equals(text.substring(i, i + m))) { System.out.println("Pattern found at index " + i); } }
// Calculate hash value for next window of text: Remove leading digit, add trailing digit if (i < n - m) { textHash = (prime * (textHash - text.charAt(i) * h) + text.charAt(i + m)); } } }
public static void main(String[] args) { String text = "ABABDABACDABABCABAB"; String pattern = "ABABCABAB"; int prime = 101; rabinKarp(text, pattern, prime); // Output: Pattern found at index 10 }}C++:
#include <iostream>#include <string>#include <vector>
using namespace std;
vector<int> rabinKarp(const string& text, const string& pattern, int prime = 101) { int n = text.length(); int m = pattern.length(); if (m > n) { return {}; }
long long patternHash = 0; long long textHash = 0; long long h = 1;
// Calculate h = prime^(m-1) for (int i = 0; i < m - 1; i++) { h = (h * prime); }
// Calculate initial hash values for pattern and first substring of text for (int i = 0; i < m; i++) { patternHash = (prime * patternHash + pattern[i]); textHash = (prime * textHash + text[i]); }
vector<int> indices;
// Slide the pattern over text one by one for (int i = 0; i <= n - m; i++) { // Check the hash values of current window of text and pattern if (patternHash == textHash) { // If the hash values match, then check for actual match if (pattern == text.substr(i, m)) { indices.push_back(i); } }
// Calculate hash value for next window of text: Remove leading digit, add trailing digit if (i < n - m) { textHash = (prime * (textHash - text[i] * h) + text[i + m]); } }
return indices;}
int main() { string text = "ABABDABACDABABCABAB"; string pattern = "ABABCABAB"; vector<int> result = rabinKarp(text, pattern);
cout << "Pattern found at indices: "; for (int index : result) { cout << index << " "; } cout << endl; // Output: Pattern found at indices: 10 return 0;}4. Common Patterns
Section titled “4. Common Patterns”- Multiple Pattern Search: Extend the algorithm to search for multiple patterns simultaneously by storing the hash values of all patterns in a set or hash table.
- Approximate String Matching: Modify the algorithm to allow for a certain number of mismatches between the pattern and the text. This can be done by comparing the hash values and then performing a more detailed comparison only for substrings with similar hash values.
- Rolling Hash: The core technique used in Rabin-Karp. It allows for efficient calculation of the hash value of the next substring by using the hash value of the current substring. This is critical for achieving O(n) average-case time complexity.
5. Pro Tips
Section titled “5. Pro Tips”- Prime Number Selection: Choose a large prime number to minimize collisions. Larger primes reduce the likelihood of different strings hashing to the same value.
- Modulo Arithmetic: Use modulo operator (%) to prevent integer overflow, especially when dealing with large prime numbers and long strings.
- Spurious Hits: Handle spurious hits (hash match but string mismatch) efficiently. Always perform a character-by-character comparison when hash values match.
- Hash Function Choice: The quality of the hash function significantly impacts performance. A good hash function distributes strings evenly across the hash space, minimizing collisions. The polynomial hash function (used in the examples) is a common choice.
- Precomputation: Precompute
h = prime^(m-1)outside the main loop for efficiency.
6. Practice Problems
Section titled “6. Practice Problems”- LeetCode 28. Find the Index of the First Occurrence in a String: Implement
strStr()using Rabin-Karp. - LeetCode 187. Repeated DNA Sequences: Find all 10-letter-long sequences (substrings) that occur more than once in a DNA molecule. (Good for practicing rolling hash)
- Custom Problem: Given a text and a list of patterns, find all occurrences of any of the patterns in the text.
7. Templates
Section titled “7. Templates”C++ Template:
#include <iostream>#include <string>#include <vector>
using namespace std;
vector<int> rabinKarpTemplate(const string& text, const string& pattern, int prime = 101) { int n = text.length(); int m = pattern.length(); if (m > n) { return {}; }
long long patternHash = 0; long long textHash = 0; long long h = 1;
// Calculate h = prime^(m-1) for (int i = 0; i < m - 1; i++) { h = (h * prime); }
// Calculate initial hash values for pattern and first substring of text for (int i = 0; i < m; i++) { patternHash = (prime * patternHash + pattern[i]); textHash = (prime * textHash + text[i]); }
vector<int> indices;
// Slide the pattern over text one by one for (int i = 0; i <= n - m; i++) { // Check the hash values of current window of text and pattern if (patternHash == textHash) { // If the hash values match, then check for actual match if (pattern == text.substr(i, m)) { indices.push_back(i); } }
// Calculate hash value for next window of text: Remove leading digit, add trailing digit if (i < n - m) { textHash = (prime * (textHash - text[i] * h) + text[i + m]); } }
return indices;}Python Template:
def rabin_karp_template(text, pattern, prime=101): n = len(text) m = len(pattern) if m > n: return []
pattern_hash = 0 text_hash = 0 h = 1
for i in range(m - 1): h = (h * prime)
for i in range(m): pattern_hash = (prime * pattern_hash + ord(pattern[i])) text_hash = (prime * text_hash + ord(text[i]))
indices = []
for i in range(n - m + 1): if pattern_hash == text_hash: if pattern == text[i:i + m]: indices.append(i)
if i < n - m: text_hash = (prime * (text_hash - ord(text[i]) * h) + ord(text[i + m]))
return indicesJava Template:
public class RabinKarpTemplate {
public static void rabinKarpTemplate(String text, String pattern, int prime) { int n = text.length(); int m = pattern.length(); if (m > n) { return; }
long patternHash = 0; long textHash = 0; long h = 1;
for (int i = 0; i < m - 1; i++) { h = (h * prime); }
for (int i = 0; i < m; i++) { patternHash = (prime * patternHash + pattern.charAt(i)); textHash = (prime * textHash + text.charAt(i)); }
for (int i = 0; i <= n - m; i++) { if (patternHash == textHash) { if (pattern.equals(text.substring(i, i + m))) { System.out.println("Pattern found at index " + i); } }
if (i < n - m) { textHash = (prime * (textHash - text.charAt(i) * h) + text.charAt(i + m)); } } }}