Hash Table Implementation
Generated on 2025-07-10 02:09:15 Algorithm Cheatsheet for Technical Interviews
Hash Table Implementation Cheatsheet
Section titled “Hash Table Implementation Cheatsheet”1. Quick Overview
Section titled “1. Quick Overview”What is it?
A hash table (also known as a hash map) is a data structure that implements an associative array abstract data type, which can map keys to values. It uses a hash function to compute an index into an array of buckets or slots, from which the desired value can be found.
When to Use it?
- When you need fast average-case lookup, insertion, and deletion operations (O(1) on average).
- When you need to implement associative arrays or dictionaries.
- When the order of elements doesn’t matter.
- For caching, symbol tables, and database indexing.
2. Time & Space Complexity
Section titled “2. Time & Space Complexity”| Operation | Average Case | Worst Case |
|---|---|---|
| Insertion | O(1) | O(n) |
| Deletion | O(1) | O(n) |
| Lookup | O(1) | O(n) |
| Space | O(n) | O(n) |
- Worst Case: Occurs when all keys hash to the same index (collisions). This degrades performance to O(n) for lookup, insertion, and deletion.
- Average Case: Assumes a good hash function that distributes keys evenly across the table.
3. Implementation
Section titled “3. Implementation”Here are implementations in Python, Java, and C++, demonstrating separate chaining (collision resolution):
Python
class HashTable: def __init__(self, capacity=10): self.capacity = capacity self.table = [[] for _ in range(capacity)] # List of lists (separate chaining) self.size = 0
def __len__(self): return self.size
def __contains__(self, key): index = self._hash(key) % self.capacity for k, _ in self.table[index]: # Iterate through the chain if k == key: return True return False
def insert(self, key, value): index = self._hash(key) % self.capacity for i, (k, v) in enumerate(self.table[index]): if k == key: self.table[index][i] = (key, value) # Update existing key return
self.table[index].append((key, value)) # Add new key-value pair self.size += 1
def get(self, key): index = self._hash(key) % self.capacity for k, v in self.table[index]: if k == key: return v return None # Key not found
def delete(self, key): index = self._hash(key) % self.capacity for i, (k, v) in enumerate(self.table[index]): if k == key: del self.table[index][i] self.size -= 1 return # Key not found, no deletion
def _hash(self, key): return hash(key) # Built-in Python hash function
def print_table(self): for i in range(self.capacity): print(f"Bucket {i}: {self.table[i]}")
# Example Usageht = HashTable()ht.insert("apple", 1)ht.insert("banana", 2)ht.insert("cherry", 3)
print(ht.get("banana")) # Output: 2ht.delete("banana")print(ht.get("banana")) # Output: Noneht.print_table()Java
import java.util.LinkedList;import java.util.List;
class HashTable<K, V> { private static final int DEFAULT_CAPACITY = 10; private List<Entry<K, V>>[] table; private int size;
public HashTable(int capacity) { table = new LinkedList[capacity]; for (int i = 0; i < capacity; i++) { table[i] = new LinkedList<>(); } }
public HashTable() { this(DEFAULT_CAPACITY); }
public int size() { return size; }
public boolean isEmpty() { return size == 0; }
public void put(K key, V value) { int index = hash(key) % table.length; List<Entry<K, V>> bucket = table[index];
for (Entry<K, V> entry : bucket) { if (entry.key.equals(key)) { entry.value = value; // Update existing key return; } }
bucket.add(new Entry<>(key, value)); size++; }
public V get(K key) { int index = hash(key) % table.length; List<Entry<K, V>> bucket = table[index];
for (Entry<K, V> entry : bucket) { if (entry.key.equals(key)) { return entry.value; } }
return null; // Key not found }
public void remove(K key) { int index = hash(key) % table.length; List<Entry<K, V>> bucket = table[index];
bucket.removeIf(entry -> entry.key.equals(key)); size--; }
private int hash(K key) { return Math.abs(key.hashCode()); // Ensure positive hash code }
private static class Entry<K, V> { K key; V value;
Entry(K key, V value) { this.key = key; this.value = value; } }
public void printTable() { for (int i = 0; i < table.length; i++) { System.out.print("Bucket " + i + ": "); if (table[i] != null) { for (Entry<K, V> entry : table[i]) { System.out.print("(" + entry.key + ", " + entry.value + ") "); } } System.out.println(); } }
public static void main(String[] args) { HashTable<String, Integer> ht = new HashTable<>(); ht.put("apple", 1); ht.put("banana", 2); ht.put("cherry", 3);
System.out.println(ht.get("banana")); // Output: 2 ht.remove("banana"); System.out.println(ht.get("banana")); // Output: null ht.printTable(); }
}C++
#include <iostream>#include <vector>#include <list>
template <typename K, typename V>class HashTable {private: size_t capacity; std::vector<std::list<std::pair<K, V>>> table; size_t size;
size_t hash(const K& key) const { // Simple hash function (can be improved) std::hash<K> hasher; return hasher(key) % capacity; }
public: HashTable(size_t capacity = 10) : capacity(capacity), table(capacity), size(0) {}
size_t getSize() const { return size; }
bool isEmpty() const { return size == 0; }
void insert(const K& key, const V& value) { size_t index = hash(key); for (auto& pair : table[index]) { if (pair.first == key) { pair.second = value; // Update existing key return; } } table[index].push_back({key, value}); size++; }
V get(const K& key) const { size_t index = hash(key); for (const auto& pair : table[index]) { if (pair.first == key) { return pair.second; } } return V(); // Key not found (default value for V) }
void remove(const K& key) { size_t index = hash(key); table[index].remove_if([&](const std::pair<K, V>& pair) { return pair.first == key; }); size--; }
void printTable() const { for (size_t i = 0; i < capacity; ++i) { std::cout << "Bucket " << i << ": "; for (const auto& pair : table[i]) { std::cout << "(" << pair.first << ", " << pair.second << ") "; } std::cout << std::endl; } }};
int main() { HashTable<std::string, int> ht; ht.insert("apple", 1); ht.insert("banana", 2); ht.insert("cherry", 3);
std::cout << ht.get("banana") << std::endl; // Output: 2 ht.remove("banana"); std::cout << ht.get("banana") << std::endl; // Output: 0 (default int value) ht.printTable();
return 0;}Explanation:
capacity: The initial size of the array. Choosing a good capacity is crucial for performance. Prime numbers often work well.table: The underlying array that stores the key-value pairs.size: The number of key-value pairs currently stored in the hash table.hash(key): A function that takes a key and returns an integer representing the index in the table. A good hash function distributes keys evenly. The modulo operator (% capacity) ensures the index is within the bounds of the array.- Collision Handling (Separate Chaining): Each index in the
tableis a linked list (or vector). When two keys hash to the same index, they are both added to the linked list at that index. - Insertion: Calculate the index, search the linked list for the key. If the key exists, update the value. Otherwise, add a new key-value pair to the linked list.
- Lookup: Calculate the index, search the linked list for the key, and return the corresponding value.
- Deletion: Calculate the index, search the linked list for the key, and remove the key-value pair.
4. Common Patterns
Section titled “4. Common Patterns”- Frequency Counting: Use a hash table to count the occurrences of elements in an array or string.
- Caching: Store frequently accessed data in a hash table for quick retrieval.
- Symbol Tables: Compilers use hash tables to store information about variables and functions.
- Deduplication: Use a hash table to check if an element already exists before adding it to a collection.
- Two-Sum/K-Sum Problems: Hash tables are used to efficiently find pairs or groups of numbers that add up to a target value.
Variations:
- Open Addressing: Instead of linked lists, collisions are resolved by probing for an empty slot in the array. Common probing techniques include linear probing, quadratic probing, and double hashing. Open addressing can be more space-efficient than separate chaining, but it can suffer from clustering.
- Cuckoo Hashing: Uses multiple hash functions and moves elements around to resolve collisions. Can provide good performance but is more complex to implement.
- Dynamic Resizing: When the hash table becomes too full (load factor exceeds a threshold), resize the table to maintain good performance. This involves rehashing all the existing elements.
5. Pro Tips
Section titled “5. Pro Tips”- Choose a Good Hash Function: A good hash function is essential for even distribution of keys and minimizing collisions. Consider using well-established hash functions like MurmurHash or CityHash for better performance. Avoid simple modulo-based hash functions, especially when the keys have patterns.
- Load Factor: The load factor is the ratio of the number of elements to the capacity of the hash table (
size / capacity). Keep the load factor below a certain threshold (e.g., 0.75) to maintain good performance. When the load factor exceeds the threshold, resize the table. - Resizing: When resizing, double the capacity (or multiply by a factor greater than 1). Rehashing all the elements is an O(n) operation, so resizing should be done judiciously.
- Separate Chaining vs. Open Addressing: Separate chaining is generally simpler to implement and handles collisions more gracefully. Open addressing can be more space-efficient, but it is more susceptible to clustering.
- Consider Using Built-in Hash Table Implementations: Most programming languages provide optimized hash table implementations (e.g.,
dictin Python,HashMapin Java,unordered_mapin C++). Use these when possible, as they are likely to be more efficient than custom implementations. However, understand how they work internally. - Prime Number Capacity: Using a prime number for the initial capacity of the hash table can help to distribute keys more evenly, especially if the hash function is not perfect.
- Avoid Mutable Keys: If you are using mutable objects as keys, be careful not to modify them after they have been inserted into the hash table. Changing the key will change its hash code, and the hash table will no longer be able to find the key.
Common Mistakes to Avoid:
- Using a Poor Hash Function: This leads to excessive collisions and degrades performance.
- Ignoring Load Factor: Allowing the load factor to become too high leads to poor performance.
- Forgetting to Resize: Not resizing the hash table when it becomes too full leads to poor performance.
- Using Mutable Keys: This can lead to data corruption and unexpected behavior.
- Assuming O(1) in all cases: Remember that the worst-case time complexity can be O(n) due to collisions.
6. Practice Problems
Section titled “6. Practice Problems”-
Two Sum (LeetCode #1): Given an array of integers
numsand an integertarget, return indices of the two numbers such that they add up totarget.- Solution: Use a hash table to store the numbers and their indices. Iterate through the array, and for each number, check if
target - numexists in the hash table.
- Solution: Use a hash table to store the numbers and their indices. Iterate through the array, and for each number, check if
-
First Unique Character in a String (LeetCode #387): Given a string
s, find the first non-repeating character in it and return its index. If it does not exist, return-1.- Solution: Use a hash table to count the frequency of each character in the string. Then, iterate through the string and return the index of the first character with a frequency of 1.
-
Group Anagrams (LeetCode #49): Given an array of strings
strs, group the anagrams together. You can return the answer in any order.- Solution: Use a hash table where the key is a sorted version of the string (representing its anagram group) and the value is a list of strings that are anagrams of each other.
7. Templates
Section titled “7. Templates”These are basic templates to get you started. Remember to adapt them to the specific problem requirements.
C++
#include <iostream>#include <unordered_map>#include <vector>
using namespace std;
// Template for frequency countingunordered_map<int, int> frequencyCounter(const vector<int>& nums) { unordered_map<int, int> counts; for (int num : nums) { counts[num]++; } return counts;}
//Template for two sumvector<int> twoSum(vector<int>& nums, int target) { unordered_map<int, int> numMap; for (int i = 0; i < nums.size(); ++i) { int complement = target - nums[i]; if (numMap.count(complement)) { return {numMap[complement], i}; } numMap[nums[i]] = i; } return {}; // No solution found}
int main() { // Example Usage: Frequency Counting vector<int> data = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4}; unordered_map<int, int> freq = frequencyCounter(data); cout << "Frequency Counts:" << endl; for (const auto& pair : freq) { cout << pair.first << ": " << pair.second << endl; }
return 0;}Python
from collections import defaultdict
# Template for frequency countingdef frequency_counter(nums): counts = {} for num in nums: counts[num] = counts.get(num, 0) + 1 return counts
# Template for groupingdef group_by(items, key_func): """Groups items into a dictionary based on a key function.""" grouped = defaultdict(list) for item in items: key = key_func(item) grouped[key].append(item) return grouped
def twoSum(nums, target): numMap = {} for i, num in enumerate(nums): complement = target - num if complement in numMap: return [numMap[complement], i] numMap[num] = i return [] # No solution found
# Example Usage: Frequency Countingdata = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]freq = frequency_counter(data)print("Frequency Counts:")for num, count in freq.items(): print(f"{num}: {count}")Java
import java.util.HashMap;import java.util.List;import java.util.ArrayList;import java.util.Arrays;import java.util.Map;
class HashTableTemplates {
// Template for frequency counting public static Map<Integer, Integer> frequencyCounter(int[] nums) { Map<Integer, Integer> counts = new HashMap<>(); for (int num : nums) { counts.put(num, counts.getOrDefault(num, 0) + 1); } return counts; }
public static int[] twoSum(int[] nums, int target) { Map<Integer, Integer> numMap = new HashMap<>(); for (int i = 0; i < nums.length; ++i) { int complement = target - nums[i]; if (numMap.containsKey(complement)) { return new int[] {numMap.get(complement), i}; } numMap.put(nums[i], i); } return new int[] {}; // No solution found }
public static void main(String[] args) { // Example Usage: Frequency Counting int[] data = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4}; Map<Integer, Integer> freq = frequencyCounter(data); System.out.println("Frequency Counts:"); for (Map.Entry<Integer, Integer> entry : freq.entrySet()) { System.out.println(entry.getKey() + ": " + entry.getValue()); } }}This cheatsheet provides a strong foundation for understanding and implementing hash tables in various programming languages. Remember to adapt and customize these concepts to the specific requirements of your problem. Good luck!