Intro
A Hash Function takes data (like a string, or a file’s contents) and outputs a hash, a fixed-size string or number
Hash Functions
Collision Resolution
A Hash Table / Map is a Data Structure that implements an associative array, mapping keys to values using a hash function
Key components:
A Dictionary in Python is a Hash Table / Map that has unordered, unique and immutable keys (such as strings, numbers, or tuples) and values that can be any Python object
# Creating a dictionary (key-value pairs)
d = {"a": 1, "b": 2, "c": 3}
# Insert / Update value
d["d"] = 4 # O(1) average
d["a"] = 10 # O(1) average (overwrite)
# Access value
value = d["b"] # O(1) average
# Safe access (no KeyError)
value = d.get("x", 0) # O(1)
# Check if key exists
if "a" in d: # O(1)
print("Key exists")
# Remove a key
del d["c"] # O(1)
# Remove and return value
removed = d.pop("b") # O(1)
# Remove last inserted item
key, val = d.popitem() # O(1)
# Dictionary size
size = len(d) # O(1)
# Clear dictionary
d.clear() # O(1)
# -------------------------------------
# Iteration
for key in d.keys(): # O(n)
print(key)
for value in d.values(): # O(n)
print(value)
for key, value in d.items(): # O(n)
print(key, value)
# -------------------------------------
# Dictionary merging (Python 3.9+)
d1 = {"a": 1, "b": 2}
d2 = {"b": 3, "c": 4}
merged = d1 | d2 # O(len(d1) + len(d2))
# If duplicate keys, right-hand dict wins
# -------------------------------------
# Practical use case: frequency counting
nums = [1, 2, 2, 3, 3, 3]
freq = {}
for n in nums: # O(n)
freq[n] = freq.get(n, 0) + 1
# freq → {1: 1, 2: 2, 3: 3}
A Hash Set is like a Hash Table, except it only stores keys, without values
Key components:
A Set in Python is a Hash Set that is unordered, has unique elements and is hash-based
# Creating a set (unordered, unique elements)
s = {1, 2, 3}
# Add an element
s.add(4) # O(1) average
# Adds 4 if it doesn't already exist
# Remove an element (raises error if missing)
s.remove(2) # O(1) average
# Discard an element (no error if missing)
s.discard(10) # O(1) average
# Membership check
if 3 in s: # O(1) average
print("3 exists in set")
# Length of set
size = len(s) # O(1)
# Copying a set
s_copy = s.copy() # O(n)
# Clear all elements
s.clear() # O(1)
# -------------------------------------
# Set algebra operations
a = {1, 2, 3}
b = {3, 4, 5}
# Union: all unique elements
union = a | b # or a.union(b) → O(len(a) + len(b))
# Intersection: common elements
intersection = a & b # or a.intersection(b) → O(min(len(a), len(b)))
# Difference: elements in a not in b
difference = a - b # or a.difference(b) → O(len(a))
# Symmetric difference: in either, not both
sym_diff = a ^ b # or a.symmetric_difference(b) → O(len(a) + len(b))
# -------------------------------------
# Subset / Superset checks
is_subset = a <= b # O(len(a))
is_superset = a >= b # O(len(b))
# -------------------------------------
# Practical use case: detect duplicates in a list
nums = [1, 2, 3, 3, 4]
has_duplicates = len(nums) != len(set(nums)) # O(n)
Usage
Hash Table
Hash Sets
Comparison
Advantages
Hash Table
Hash Set
Disadvantages
Hash Table / Set
Patterns
Algorithms
Count Sort → https://www.visuallearner.org/algorithms/sorting/count-sort
Bucket Sort → https://www.visuallearner.org/algorithms/sorting/bucket-sort
Problems
Corner Cases
Implementation
| Time Complexity | ||
|---|---|---|
| Operations | Worst case | Average case |
| insert | O(n) |
O(1) |
| search | O(n) |
O(1) |
| remove | O(n) |
O(1) |
| --- | --- |
n is the number of elements in the hash table
Chaining
class Node:
def __init__(self, key):
self.key = key
self.next = None
class ChainingHashTable:
def __init__(self, size=11):
self.size = size
self.table = [None] * size
self.count = 0
def _hash(self, key):
return key % self.size
def insert(self, key):
index = self._hash(key)
# Check if key already exists
current = self.table[index]
while current:
if current.key == key:
return
current = current.next
# Insert new node at beginning of chain
new_node = Node(key)
new_node.next = self.table[index]
self.table[index] = new_node
self.count += 1
# Resize if load factor > 0.75
if self.count > self.size * 0.75:
self._resize()
def search(self, key):
index = self._hash(key)
current = self.table[index]
while current:
if current.key == key:
return True
current = current.next
return False
def delete(self, key):
index = self._hash(key)
current = self.table[index]
prev = None
while current:
if current.key == key:
if prev:
prev.next = current.next
else:
self.table[index] = current.next
self.count -= 1
return True
prev = current
current = current.next
return False
def _resize(self):
old_table = self.table
self.size = self._next_prime(self.size * 2)
self.table = [None] * self.size
self.count = 0
for head in old_table:
current = head
while current:
self.insert(current.key)
current = current.next
def display(self):
for i in range(self.size):
print(f"[{i}]: ", end="")
current = self.table[i]
while current:
print(f"{current.key} -> ", end="")
current = current.next
print("NULL")
Linear Probing
Linear Probing is a collision resolution technique for hash tables that uses consecutive slots to resolve collisions
class LinearProbingHashTable:
def __init__(self, size=11):
self.size = size
self.table = [None] * size
self.deleted = [False] * size
self.count = 0
def _hash(self, key):
return key % self.size
def insert(self, key):
if self.count >= self.size * 0.7:
self._resize()
index = self._hash(key)
# Linear probing
while (self.table[index] is not None and
self.table[index] != key and
not self.deleted[index]):
index = (index + 1) % self.size
if self.table[index] != key:
self.table[index] = key
self.deleted[index] = False
self.count += 1
return True
def search(self, key):
index = self._hash(key)
probes = 0
while (self.table[index] is not None or self.deleted[index]):
if (self.table[index] == key and not self.deleted[index]):
return index
index = (index + 1) % self.size
probes += 1
if probes >= self.size:
break
return -1
def delete(self, key):
index = self.search(key)
if index != -1:
self.table[index] = None
self.deleted[index] = True
self.count -= 1
return True
return False
def _resize(self):
old_table = self.table[:]
old_deleted = self.deleted[:]
self.size = self._next_prime(self.size * 2)
self.table = [None] * self.size
self.deleted = [False] * self.size
self.count = 0
for i, item in enumerate(old_table):
if item is not None and not old_deleted[i]:
self.insert(item)
def _is_prime(self, n):
if n < 2:
return False
for i in range(2, int(n**0.5) + 1):
if n % i == 0:
return False
return True
def _next_prime(self, n):
while not self._is_prime(n):
n += 1
return n
def display(self):
for i in range(self.size):
if self.table[i] is not None and not self.deleted[i]:
print(f"[{i}]: {self.table[i]}")
elif self.deleted[i]:
print(f"[{i}]: DELETED")
else:
print(f"[{i}]: EMPTY")
Quadratic Probing
Quadratic Probing is a collision resolution technique for hash tables that uses a quadratic function to find the next available slot when a collision occurs
class QuadraticProbingHashTable:
def __init__(self, size=11):
self.size = size
self.table = [None] * size
self.deleted = [False] * size
self.count = 0
def _hash(self, key):
return key % self.size
def insert(self, key):
if self.count >= self.size * 0.7:
self._resize()
index = self._hash(key)
i = 0
while (self.table[index] is not None and
self.table[index] != key and
not self.deleted[index]):
i += 1
index = (self._hash(key) + i * i) % self.size
if i >= self.size:
return False
if self.table[index] != key:
self.table[index] = key
self.deleted[index] = False
self.count += 1
return True
def search(self, key):
index = self._hash(key)
i = 0
while (self.table[index] is not None or self.deleted[index]):
if (self.table[index] == key and not self.deleted[index]):
return index
i += 1
index = (self._hash(key) + i * i) % self.size
if i >= self.size:
break
return -1
def delete(self, key):
index = self.search(key)
if index != -1:
self.table[index] = None
self.deleted[index] = True
self.count -= 1
return True
return False
def _resize(self):
old_table = self.table[:]
old_deleted = self.deleted[:]
self.size = self._next_prime(self.size * 2)
self.table = [None] * self.size
self.deleted = [False] * self.size
self.count = 0
for i, item in enumerate(old_table):
if item is not None and not old_deleted[i]:
self.insert(item)
def _is_prime(self, n):
if n < 2:
return False
for i in range(2, int(n**0.5) + 1):
if n % i == 0:
return False
return True
def _next_prime(self, n):
while not self._is_prime(n):
n += 1
return n
Resources
Taking Hash Tables Off The Shelf
Visual Learner - Interactive Algorithm Visualizations
Visual Learner - Interactive Algorithm Visualizations
Visual Learner - Interactive Algorithm Visualizations
Visual Learner - Interactive Algorithm Visualizations
Here are some takeaways about how to design the key for you.
sorted string/array as the key.offset as the key.TreeNode as key sometimes. But in most cases, the serialization of the subtree might be a better idea.the row index or the column index as key.block this element belongs to.diagonal line.