Hash Table Implementation

Generated on 2025-07-10 02:09:15 Algorithm Cheatsheet for Technical Interviews

Hash Table Implementation Cheatsheet

1. Quick Overview

What is it?

A hash table (also known as a hash map) is a data structure that implements an associative array abstract data type, which can map keys to values. It uses a hash function to compute an index into an array of buckets or slots, from which the desired value can be found.

When to Use it?

When you need fast average-case lookup, insertion, and deletion operations (O(1) on average).
When you need to implement associative arrays or dictionaries.
When the order of elements doesn’t matter.
For caching, symbol tables, and database indexing.

2. Time & Space Complexity

Operation	Average Case	Worst Case
Insertion	O(1)	O(n)
Deletion	O(1)	O(n)
Lookup	O(1)	O(n)
Space	O(n)	O(n)

Worst Case: Occurs when all keys hash to the same index (collisions). This degrades performance to O(n) for lookup, insertion, and deletion.
Average Case: Assumes a good hash function that distributes keys evenly across the table.

3. Implementation

Here are implementations in Python, Java, and C++, demonstrating separate chaining (collision resolution):

Python

class HashTable:
    def __init__(self, capacity=10):
        self.capacity = capacity
        self.table = [[] for _ in range(capacity)]  # List of lists (separate chaining)
        self.size = 0

    def __len__(self):
        return self.size

    def __contains__(self, key):
        index = self._hash(key) % self.capacity
        for k, _ in self.table[index]:  # Iterate through the chain
            if k == key:
                return True
        return False

    def insert(self, key, value):
        index = self._hash(key) % self.capacity
        for i, (k, v) in enumerate(self.table[index]):
            if k == key:
                self.table[index][i] = (key, value) # Update existing key
                return

        self.table[index].append((key, value))  # Add new key-value pair
        self.size += 1

    def get(self, key):
        index = self._hash(key) % self.capacity
        for k, v in self.table[index]:
            if k == key:
                return v
        return None  # Key not found

    def delete(self, key):
        index = self._hash(key) % self.capacity
        for i, (k, v) in enumerate(self.table[index]):
            if k == key:
                del self.table[index][i]
                self.size -= 1
                return
        # Key not found, no deletion

    def _hash(self, key):
        return hash(key) # Built-in Python hash function

    def print_table(self):
        for i in range(self.capacity):
            print(f"Bucket {i}: {self.table[i]}")


# Example Usage
ht = HashTable()
ht.insert("apple", 1)
ht.insert("banana", 2)
ht.insert("cherry", 3)

print(ht.get("banana"))  # Output: 2
ht.delete("banana")
print(ht.get("banana"))  # Output: None
ht.print_table()

Java

import java.util.LinkedList;
import java.util.List;

class HashTable<K, V> {
    private static final int DEFAULT_CAPACITY = 10;
    private List<Entry<K, V>>[] table;
    private int size;

    public HashTable(int capacity) {
        table = new LinkedList[capacity];
        for (int i = 0; i < capacity; i++) {
            table[i] = new LinkedList<>();
        }
    }

    public HashTable() {
        this(DEFAULT_CAPACITY);
    }

    public int size() {
        return size;
    }

    public boolean isEmpty() {
        return size == 0;
    }

    public void put(K key, V value) {
        int index = hash(key) % table.length;
        List<Entry<K, V>> bucket = table[index];

        for (Entry<K, V> entry : bucket) {
            if (entry.key.equals(key)) {
                entry.value = value; // Update existing key
                return;
            }
        }

        bucket.add(new Entry<>(key, value));
        size++;
    }

    public V get(K key) {
        int index = hash(key) % table.length;
        List<Entry<K, V>> bucket = table[index];

        for (Entry<K, V> entry : bucket) {
            if (entry.key.equals(key)) {
                return entry.value;
            }
        }

        return null; // Key not found
    }

    public void remove(K key) {
        int index = hash(key) % table.length;
        List<Entry<K, V>> bucket = table[index];

        bucket.removeIf(entry -> entry.key.equals(key));
        size--;
    }

    private int hash(K key) {
        return Math.abs(key.hashCode()); // Ensure positive hash code
    }

    private static class Entry<K, V> {
        K key;
        V value;

        Entry(K key, V value) {
            this.key = key;
            this.value = value;
        }
    }

    public void printTable() {
        for (int i = 0; i < table.length; i++) {
            System.out.print("Bucket " + i + ": ");
            if (table[i] != null) {
                for (Entry<K, V> entry : table[i]) {
                    System.out.print("(" + entry.key + ", " + entry.value + ") ");
                }
            }
            System.out.println();
        }
    }

    public static void main(String[] args) {
        HashTable<String, Integer> ht = new HashTable<>();
        ht.put("apple", 1);
        ht.put("banana", 2);
        ht.put("cherry", 3);

        System.out.println(ht.get("banana")); // Output: 2
        ht.remove("banana");
        System.out.println(ht.get("banana")); // Output: null
        ht.printTable();
    }

}

C++

#include <iostream>
#include <vector>
#include <list>

template <typename K, typename V>
class HashTable {
private:
    size_t capacity;
    std::vector<std::list<std::pair<K, V>>> table;
    size_t size;

    size_t hash(const K& key) const {
        // Simple hash function (can be improved)
        std::hash<K> hasher;
        return hasher(key) % capacity;
    }

public:
    HashTable(size_t capacity = 10) : capacity(capacity), table(capacity), size(0) {}

    size_t getSize() const {
        return size;
    }

    bool isEmpty() const {
        return size == 0;
    }

    void insert(const K& key, const V& value) {
        size_t index = hash(key);
        for (auto& pair : table[index]) {
            if (pair.first == key) {
                pair.second = value; // Update existing key
                return;
            }
        }
        table[index].push_back({key, value});
        size++;
    }

    V get(const K& key) const {
        size_t index = hash(key);
        for (const auto& pair : table[index]) {
            if (pair.first == key) {
                return pair.second;
            }
        }
        return V(); // Key not found (default value for V)
    }

    void remove(const K& key) {
        size_t index = hash(key);
        table[index].remove_if([&](const std::pair<K, V>& pair) {
            return pair.first == key;
        });
        size--;
    }

    void printTable() const {
        for (size_t i = 0; i < capacity; ++i) {
            std::cout << "Bucket " << i << ": ";
            for (const auto& pair : table[i]) {
                std::cout << "(" << pair.first << ", " << pair.second << ") ";
            }
            std::cout << std::endl;
        }
    }
};

int main() {
    HashTable<std::string, int> ht;
    ht.insert("apple", 1);
    ht.insert("banana", 2);
    ht.insert("cherry", 3);

    std::cout << ht.get("banana") << std::endl; // Output: 2
    ht.remove("banana");
    std::cout << ht.get("banana") << std::endl; // Output: 0 (default int value)
    ht.printTable();

    return 0;
}

Explanation:

capacity: The initial size of the array. Choosing a good capacity is crucial for performance. Prime numbers often work well.
table: The underlying array that stores the key-value pairs.
size: The number of key-value pairs currently stored in the hash table.
hash(key): A function that takes a key and returns an integer representing the index in the table. A good hash function distributes keys evenly. The modulo operator (% capacity) ensures the index is within the bounds of the array.
Collision Handling (Separate Chaining): Each index in the table is a linked list (or vector). When two keys hash to the same index, they are both added to the linked list at that index.
Insertion: Calculate the index, search the linked list for the key. If the key exists, update the value. Otherwise, add a new key-value pair to the linked list.
Lookup: Calculate the index, search the linked list for the key, and return the corresponding value.
Deletion: Calculate the index, search the linked list for the key, and remove the key-value pair.

4. Common Patterns

Frequency Counting: Use a hash table to count the occurrences of elements in an array or string.
Caching: Store frequently accessed data in a hash table for quick retrieval.
Symbol Tables: Compilers use hash tables to store information about variables and functions.
Deduplication: Use a hash table to check if an element already exists before adding it to a collection.
Two-Sum/K-Sum Problems: Hash tables are used to efficiently find pairs or groups of numbers that add up to a target value.

Variations:

Open Addressing: Instead of linked lists, collisions are resolved by probing for an empty slot in the array. Common probing techniques include linear probing, quadratic probing, and double hashing. Open addressing can be more space-efficient than separate chaining, but it can suffer from clustering.
Cuckoo Hashing: Uses multiple hash functions and moves elements around to resolve collisions. Can provide good performance but is more complex to implement.
Dynamic Resizing: When the hash table becomes too full (load factor exceeds a threshold), resize the table to maintain good performance. This involves rehashing all the existing elements.

5. Pro Tips

Choose a Good Hash Function: A good hash function is essential for even distribution of keys and minimizing collisions. Consider using well-established hash functions like MurmurHash or CityHash for better performance. Avoid simple modulo-based hash functions, especially when the keys have patterns.
Load Factor: The load factor is the ratio of the number of elements to the capacity of the hash table ( size / capacity). Keep the load factor below a certain threshold (e.g., 0.75) to maintain good performance. When the load factor exceeds the threshold, resize the table.
Resizing: When resizing, double the capacity (or multiply by a factor greater than 1). Rehashing all the elements is an O(n) operation, so resizing should be done judiciously.
Separate Chaining vs. Open Addressing: Separate chaining is generally simpler to implement and handles collisions more gracefully. Open addressing can be more space-efficient, but it is more susceptible to clustering.
Consider Using Built-in Hash Table Implementations: Most programming languages provide optimized hash table implementations (e.g., dict in Python, HashMap in Java, unordered_map in C++). Use these when possible, as they are likely to be more efficient than custom implementations. However, understand how they work internally.
Prime Number Capacity: Using a prime number for the initial capacity of the hash table can help to distribute keys more evenly, especially if the hash function is not perfect.
Avoid Mutable Keys: If you are using mutable objects as keys, be careful not to modify them after they have been inserted into the hash table. Changing the key will change its hash code, and the hash table will no longer be able to find the key.

Common Mistakes to Avoid:

Using a Poor Hash Function: This leads to excessive collisions and degrades performance.
Ignoring Load Factor: Allowing the load factor to become too high leads to poor performance.
Forgetting to Resize: Not resizing the hash table when it becomes too full leads to poor performance.
Using Mutable Keys: This can lead to data corruption and unexpected behavior.
Assuming O(1) in all cases: Remember that the worst-case time complexity can be O(n) due to collisions.

6. Practice Problems

Two Sum (LeetCode #1): Given an array of integers nums and an integer target, return indices of the two numbers such that they add up to target.
- Solution: Use a hash table to store the numbers and their indices. Iterate through the array, and for each number, check if target - num exists in the hash table.
First Unique Character in a String (LeetCode #387): Given a string s, find the first non-repeating character in it and return its index. If it does not exist, return -1.
- Solution: Use a hash table to count the frequency of each character in the string. Then, iterate through the string and return the index of the first character with a frequency of 1.
Group Anagrams (LeetCode #49): Given an array of strings strs, group the anagrams together. You can return the answer in any order.
- Solution: Use a hash table where the key is a sorted version of the string (representing its anagram group) and the value is a list of strings that are anagrams of each other.

7. Templates

These are basic templates to get you started. Remember to adapt them to the specific problem requirements.

C++

#include <iostream>
#include <unordered_map>
#include <vector>

using namespace std;

// Template for frequency counting
unordered_map<int, int> frequencyCounter(const vector<int>& nums) {
    unordered_map<int, int> counts;
    for (int num : nums) {
        counts[num]++;
    }
    return counts;
}

//Template for two sum
vector<int> twoSum(vector<int>& nums, int target) {
    unordered_map<int, int> numMap;
    for (int i = 0; i < nums.size(); ++i) {
        int complement = target - nums[i];
        if (numMap.count(complement)) {
            return {numMap[complement], i};
        }
        numMap[nums[i]] = i;
    }
    return {}; // No solution found
}

int main() {
    // Example Usage: Frequency Counting
    vector<int> data = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4};
    unordered_map<int, int> freq = frequencyCounter(data);
    cout << "Frequency Counts:" << endl;
    for (const auto& pair : freq) {
        cout << pair.first << ": " << pair.second << endl;
    }

    return 0;
}

Python

from collections import defaultdict

# Template for frequency counting
def frequency_counter(nums):
    counts = {}
    for num in nums:
        counts[num] = counts.get(num, 0) + 1
    return counts

# Template for grouping
def group_by(items, key_func):
  """Groups items into a dictionary based on a key function."""
  grouped = defaultdict(list)
  for item in items:
    key = key_func(item)
    grouped[key].append(item)
  return grouped

def twoSum(nums, target):
    numMap = {}
    for i, num in enumerate(nums):
        complement = target - num
        if complement in numMap:
            return [numMap[complement], i]
        numMap[num] = i
    return []  # No solution found

# Example Usage: Frequency Counting
data = [1, 2, 2, 3, 3, 3, 4, 4, 4, 4]
freq = frequency_counter(data)
print("Frequency Counts:")
for num, count in freq.items():
    print(f"{num}: {count}")

Java

import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Map;

class HashTableTemplates {

    // Template for frequency counting
    public static Map<Integer, Integer> frequencyCounter(int[] nums) {
        Map<Integer, Integer> counts = new HashMap<>();
        for (int num : nums) {
            counts.put(num, counts.getOrDefault(num, 0) + 1);
        }
        return counts;
    }

    public static int[] twoSum(int[] nums, int target) {
        Map<Integer, Integer> numMap = new HashMap<>();
        for (int i = 0; i < nums.length; ++i) {
            int complement = target - nums[i];
            if (numMap.containsKey(complement)) {
                return new int[] {numMap.get(complement), i};
            }
            numMap.put(nums[i], i);
        }
        return new int[] {}; // No solution found
    }

    public static void main(String[] args) {
        // Example Usage: Frequency Counting
        int[] data = {1, 2, 2, 3, 3, 3, 4, 4, 4, 4};
        Map<Integer, Integer> freq = frequencyCounter(data);
        System.out.println("Frequency Counts:");
        for (Map.Entry<Integer, Integer> entry : freq.entrySet()) {
            System.out.println(entry.getKey() + ": " + entry.getValue());
        }
    }
}

This cheatsheet provides a strong foundation for understanding and implementing hash tables in various programming languages. Remember to adapt and customize these concepts to the specific requirements of your problem. Good luck!