| Balanced tree | Hash table | |||
|---|---|---|---|---|
| Operations | (worst) | (avg.) | (worst) | |
| Sorting-unrelated operations | Insert | $O(\log n)$ | $O(1)$ | $O(n)$ |
| Delete | $O(\log n)$ | $O(1)$ | $O(n)$ | |
| Search | $O(\log n)$ | $O(1)$ | $O(n)$ | |
| Sorting-related operations | Sort | $O(n)$ | ✘ | |
| Minimum | $O(\log n)$ | ✘ | ||
| Maximum | $O(\log n)$ | ✘ | ||
| Predecessor | $O(\log n)$ | ✘ | ||
| Successor | $O(\log n)$ | ✘ | ||
| Range-Minimum | $O(\log n)$ | ✘ | ||
| Range-Maximum | $O(\log n)$ | ✘ | ||
| Range-Sum | $O(n)$ | ✘ | ||
A hash function is a function that maps arbitrary size data to fixed size data.
Polynomial hash codes.
Hashcode$(x_0, x_1, \ldots, x_{n-1}) = x_0a^{n-1} + x_1a^{n-2} + \cdots + x_{n-2}a + x_{n-1}$ (polynomial)
\begin{algorithm}
\caption{Stage 1 of hash function}
\begin{algorithmic}
\REQUIRE $key$: a string of characters (of any object) that needs to be hashed
\ENSURE $hashcode$: the hash code of the string $key$
\FUNCTION{HashCode}{$key$}
\STATE $hashcode \gets 0$
\FOR{$i \gets 0$ \TO $|key| - 1$}
\STATE $hashcode \gets hashcode \times 31 + \text{CharacterCode}(key[i])$
\ENDFOR
\RETURN $hashcode$
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
A good compression function minimizes the number of collisions for a given set of distinct hash codes.
Division method.
Compression($i$) $= i \bmod N$ (remainder)
$N \ge 1$ is the size of the bucket array.
Choose $N$ as prime for better distribution of keys.
\begin{algorithm}
\caption{Stage 2 of hash function}
\begin{algorithmic}
\REQUIRE $hashcode$: hashcode obtained from stage 1; $tablesize$: the size of the hash table
\ENSURE $hash$: an index between $0$ and $(tablesize-1)$ of the hash table
\FUNCTION{Hash}{$hashcode, tablesize$}
\STATE $hash \gets hashcode \bmod tablesize$
\RETURN $hash$
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
There are two major collision-handling schemes or collision-resolution strategies.
| Collision-handling scheme | Features |
|---|---|
| Separate chaining | Extra space (for secondary data structures) Simpler implementation |
| Open addressing | No extra space More complicated implementation |
\begin{algorithm}
\begin{algorithmic}
\STATE \textbf{class} \textsc{HashTable-SeparateChaining-SLL}$()$
\STATE $N$ (hash table size)
\STATE Create a $bucket[0 \ldots (N-1)]$ array, where each $bucket[i]$ points to a SinglyLinkedList.
\STATE
\STATE Methods:
\STATE \textsc{HashCode}$(key)$
\STATE \textsc{Hash}$(key)$
\STATE \textsc{Print}$()$
\STATE \textsc{Get}$(key)$
\STATE \textsc{Put}$(\langle key, value \rangle)$
\STATE \textsc{Remove}$(key)$
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Print}{}
\FOR{$i \gets 0$ \TO $N - 1$}
\STATE \textbf{print} ``Bucket $i$:''
\STATE \textbf{print} entire SLL contents of $bucket[i]$
\ENDFOR
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Get}{$key$}
\STATE $index \gets \text{Hash}(key)$
\IF{$key$ exists in $bucket[index]$ SLL at node $x$}
\RETURN $x.value$
\ELSE
\RETURN $null$
\ENDIF
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Put}{$\langle key, value \rangle$}
\STATE $index \gets \text{Hash}(key)$
\IF{$key$ exists in $bucket[index]$ SLL at node $x$}
\STATE Update $x.value$ with $value$
\ELSE
\STATE $bucket[index].\text{AddLast}(\langle key, value \rangle)$
\ENDIF
\IF{load factor is high}
\STATE Resize the hash table
\ENDIF
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Remove}{$key$}
\STATE $index \gets \text{Hash}(key)$
\IF{$key$ exists in $bucket[index]$ SLL at node $x$}
\STATE Remove node $x$
\ENDIF
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
| Scheme | Function |
|---|---|
| Linear probing | $f(i) = i$ |
| Quadratic probing | $f(i) = i^2$ |
\begin{algorithm}
\begin{algorithmic}
\STATE \textbf{class} \textsc{HashTable-OpenAddressing-LinearProbing}$()$
\STATE $capacity$ \COMMENT{}
\STATE $size$
\STATE MAXLOADFACTOR $= 0.75$
\STATE Create arrays $bucketkey[0 \ldots capacity-1]$, $bucketvalue[0 \ldots capacity-1]$.
\STATE
\STATE Methods:
\STATE \textsc{HashCode}$(key)$
\STATE \textsc{Hash}$(key)$
\STATE \textsc{Print}$()$
\STATE \textsc{Get}$(key)$
\STATE \textsc{Put}$(\langle key, value \rangle)$
\STATE \textsc{Remove}$(key)$
\STATE \textsc{Resize}$()$
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Print}{}
\FOR{$i \gets 0$ \TO $capacity - 1$}
\STATE \textbf{print} ``Bucket $i$:''
\IF{$bucketkey[i] = null$}
\STATE \textbf{print} ``NULL''
\ELSIF{$bucketkey[i] = \texttt{DEFUNCT}$}
\STATE \textbf{print} ``DEFUNCT''
\ELSE
\STATE \textbf{print} $\langle bucketkey[i], bucketvalue[i] \rangle$
\ENDIF
\ENDFOR
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Get}{$key$}
\STATE $index \gets \text{HashCode}(key)$
\WHILE{$bucketkey[index] \neq null$}
\IF{$bucketkey[index] \neq \texttt{DEFUNCT}$ \AND $bucketkey[index] = key$}
\RETURN $bucketvalue[index]$
\ENDIF
\STATE $index \gets (index + 1) \bmod capacity$
\ENDWHILE
\RETURN $null$ \COMMENT{key not found}
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Put}{$\langle key, value \rangle$}
\STATE $index \gets \text{HashCode}(key)$
\WHILE{$bucketkey[index] \neq null$ \AND $bucketkey[index] \neq \texttt{DEFUNCT}$}
\IF{$bucketkey[index] = key$}
\STATE $bucketvalue[index] \gets value$; \RETURN
\ENDIF
\STATE $index \gets (index + 1) \bmod capacity$
\ENDWHILE
\STATE $bucketkey[index] \gets key$; $bucketvalue[index] \gets value$
\STATE $size \gets size + 1$
\IF{$size / capacity > \texttt{MAXLOADFACTOR}$}
\STATE \CALL{Resize}{}
\ENDIF
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}
\begin{algorithm}
\begin{algorithmic}
\FUNCTION{Remove}{$key$}
\STATE $index \gets \text{HashCode}(key)$
\WHILE{$bucketkey[index] \neq null$}
\IF{$bucketkey[index] \neq \texttt{DEFUNCT}$ \AND $bucketkey[index] = key$}
\STATE $bucketkey[index] \gets \texttt{DEFUNCT}$; $bucketvalue[index] \gets null$
\STATE $size \gets size - 1$; \RETURN true
\ENDIF
\STATE $index \gets (index + 1) \bmod capacity$
\ENDWHILE
\RETURN false
\ENDFUNCTION
\end{algorithmic}
\end{algorithm}