Line 20: | Line 20: | ||
<img src="https://raw.githubusercontent.com/alexeygrigorev/wiki-figures/master/legacy/30s2g8fj552rlsrh1j0q2bjkn8.png" \> | <img src="https://raw.githubusercontent.com/alexeygrigorev/wiki-figures/master/legacy/30s2g8fj552rlsrh1j0q2bjkn8.png" \> | ||
− | == Algorithm == | + | === Algorithm === |
QuickSort(array $A$): | QuickSort(array $A$): | ||
+ | * $n = \text{len}(A)$ | ||
* if $n = 1$ return | * if $n = 1$ return | ||
− | * $p$ = ChosePivot($A | + | * $p$ = ChosePivot($A$) |
* partition $A$ around $p$ | * partition $A$ around $p$ | ||
− | * QuickSort(left from $p$) | + | * QuickSort($A$ left from $p$) |
− | * QuickSort(right from $p$) | + | * QuickSort($A$ right from $p$) |
− | + | ||
− | Partition($A | + | Partition($A$): |
* // input: $A[l..r]$ | * // input: $A[l..r]$ | ||
* $p = A[l]$ | * $p = A[l]$ | ||
Line 36: | Line 36: | ||
** if $A[j] < p$ | ** if $A[j] < p$ | ||
*** swap $j$ and $i$ | *** swap $j$ and $i$ | ||
− | *** $i+ | + | *** $i = i + 1$ |
− | + | ||
* swap $l$ and $i - 1$ | * swap $l$ and $i - 1$ | ||
running time $O(n)$ where $n = r - l + 1$ | running time $O(n)$ where $n = r - l + 1$ | ||
− | == Pivot == | + | === Pivot === |
− | + | Quality | |
* Running time depends on the quality of pivot | * Running time depends on the quality of pivot | ||
* good quality - always divides into 2 equal halves (matches the median) | * good quality - always divides into 2 equal halves (matches the median) | ||
Line 54: | Line 53: | ||
* random | * random | ||
− | == Implementation == | + | === Implementation === |
+ | ==== Java ==== | ||
<pre> | <pre> | ||
public void qsort(int[] input, int left, int right) { | public void qsort(int[] input, int left, int right) { | ||
Line 90: | Line 90: | ||
</pre> | </pre> | ||
− | == | + | ==== Python ==== |
+ | |||
+ | def pivot(A, l, r): | ||
+ | return l | ||
+ | |||
+ | def partition(A, l, r): | ||
+ | pi = pivot(A, l, r) | ||
+ | p = A[pi] | ||
+ | A[l], A[pi] = A[pi], A[l] | ||
+ | |||
+ | i = l | ||
+ | |||
+ | for j in range(i + 1, r): | ||
+ | if A[j] < p: | ||
+ | i = i + 1 | ||
+ | A[i], A[j] = A[j], A[i] | ||
+ | |||
+ | A[l], A[i] = A[i], A[l] | ||
+ | return i | ||
+ | |||
+ | def qs(A, l, r): | ||
+ | n = r - l | ||
+ | |||
+ | if n <= 1: | ||
+ | return | ||
+ | |||
+ | i = partition(A, l, r) | ||
+ | |||
+ | qs(A, l, i) | ||
+ | qs(A, i + 1, r) | ||
+ | |||
+ | def quicksort(A): | ||
+ | return qs(A, 0, len(A)) | ||
+ | |||
+ | == Hoare Partition == | ||
+ | QuickSort implementation from above does not behave well when there repeating elements. | ||
+ | |||
+ | Hoare Partitioning scheme fixes it: | ||
+ | * Idea: for pivot $p$ split array $A$ into two parts: "$\leqslant p$" and "$\geqslant p$" (not "$>p$" and "$<p$" like previously) | ||
+ | * Previously we moved both indices $i$ and $j$ from left to right. Now move $i$ from left, and $j$ from right | ||
+ | |||
+ | HoarePartition($A$, $l$, $r$): | ||
+ | * choose pivot $p$ (e.g. $p = A[l]$) | ||
+ | * $i = l$, $j = r$ | ||
+ | * repeat: | ||
+ | ** while $A[i] < p$, increment $i$ | ||
+ | ** while $A[j] > p$, decrement $j$ | ||
+ | ** if $i \geqslant j$, stop and return $j$ | ||
+ | ** swap $A[i]$ and $A[j]$ | ||
+ | ** increment $i$, decrement $j$ | ||
+ | |||
+ | The QuickSort procedure stays the same | ||
+ | |||
+ | === Implementation === | ||
+ | |||
+ | def hoare_partition(A, l, r): | ||
+ | pi = pivot(A, l, r) | ||
+ | p = A[pi] | ||
+ | |||
+ | i = l | ||
+ | j = r | ||
+ | |||
+ | while True: | ||
+ | while A[i] < p: | ||
+ | i = i + 1 | ||
+ | |||
+ | while A[j] > p: | ||
+ | j = j - 1 | ||
+ | |||
+ | if i >= j: | ||
+ | return j | ||
+ | |||
+ | A[i], A[j] = A[j], A[i] | ||
+ | i = i + 1 | ||
+ | j = j - 1 | ||
+ | |||
+ | |||
+ | def qs(A, l, r): | ||
+ | if l >= r: | ||
+ | return | ||
+ | q = hoare_partition(A, l, r) | ||
+ | qs(A, l, q) | ||
+ | qs(A, q + 1, r) | ||
+ | |||
+ | def quicksort(A): | ||
+ | return qs(A, 0, len(A) - 1) | ||
+ | |||
+ | |||
+ | == $i$th order statistics == | ||
problem | problem | ||
* input: given $i$th element and array $A$ | * input: given $i$th element and array $A$ | ||
Line 131: | Line 219: | ||
* $T(n) = O(n)$ | * $T(n) = O(n)$ | ||
+ | === Implementation === | ||
+ | |||
+ | def r_select(A, l, r, i): | ||
+ | n = r - l | ||
+ | if n == 0: | ||
+ | return A[l] | ||
+ | |||
+ | pi = partition(A, l, r) | ||
+ | |||
+ | if i == pi: | ||
+ | return A[pi] | ||
+ | if pi < i: | ||
+ | return r_select(A, pi + 1, r, i) | ||
+ | else: | ||
+ | return r_select(A, l, pi, i) | ||
+ | |||
+ | def ith_order_statistic(A, i): | ||
+ | return r_select(A.copy(), 0, len(A), i) | ||
+ | |||
+ | If there are repeating elements, we need to use HoarePartition instead | ||
== See also == | == See also == | ||
Line 138: | Line 246: | ||
== Sources == | == Sources == | ||
* [[Algorithms Design and Analysis Part 1 (coursera)]] | * [[Algorithms Design and Analysis Part 1 (coursera)]] | ||
+ | * https://stackoverflow.com/questions/40368543/quicksort-hoares-partitioning-with-duplicate-values | ||
[[Category:Algorithms]] | [[Category:Algorithms]] | ||
[[Category:Sorting]] | [[Category:Sorting]] |
Sorting algorithm based on Divide and Conquer computational paradigm
Advantages:
Main idea - partition around a pivot:
done in $O(n)$ time
partition:
QuickSort(array $A$):
Partition($A$):
running time $O(n)$ where $n = r - l + 1$
Quality
So, possible pivots
public void qsort(int[] input, int left, int right) { int n = right - left; if (n <= 1) { return; } int pivotIndex = partition(input, left, right); qsort(input, left, pivotIndex); qsort(input, pivotIndex + 1, right); } public int partition(int[] input, int left, int right) { int pivot = input[left]; int i = left + 1; for (int j = left + 1; j < right; j++) { if (input[j] < pivot) { swap(input, j, i); i++; } } swap(input, left, i - 1); return i - 1; } public void swap(int[] input, int j, int i) { int tmp = input[j]; input[j] = input[i]; input[i] = tmp; }
def pivot(A, l, r): return l def partition(A, l, r): pi = pivot(A, l, r) p = A[pi] A[l], A[pi] = A[pi], A[l] i = l for j in range(i + 1, r): if A[j] < p: i = i + 1 A[i], A[j] = A[j], A[i] A[l], A[i] = A[i], A[l] return i def qs(A, l, r): n = r - l if n <= 1: return i = partition(A, l, r) qs(A, l, i) qs(A, i + 1, r) def quicksort(A): return qs(A, 0, len(A))
QuickSort implementation from above does not behave well when there repeating elements.
Hoare Partitioning scheme fixes it:
HoarePartition($A$, $l$, $r$):
The QuickSort procedure stays the same
def hoare_partition(A, l, r): pi = pivot(A, l, r) p = A[pi] i = l j = r while True: while A[i] < p: i = i + 1 while A[j] > p: j = j - 1 if i >= j: return j A[i], A[j] = A[j], A[i] i = i + 1 j = j - 1 def qs(A, l, r): if l >= r: return q = hoare_partition(A, l, r) qs(A, l, q) qs(A, q + 1, r) def quicksort(A): return qs(A, 0, len(A) - 1)
problem
Reduction to sorting
modification for QuickSort:
how to find $i$th order?
RSelect(array $A$, len $n$, order $i$)
Best pivot - the median
def r_select(A, l, r, i): n = r - l if n == 0: return A[l] pi = partition(A, l, r) if i == pi: return A[pi] if pi < i: return r_select(A, pi + 1, r, i) else: return r_select(A, l, pi, i) def ith_order_statistic(A, i): return r_select(A.copy(), 0, len(A), i)
If there are repeating elements, we need to use HoarePartition instead