kth smallest element ( or median finding )
The variation on quicksort called quickselect is used here. You do exactlty the same pivot partition except, you keep track of what is the kth for “this” recursion call and only recurse into the part that contains the kth element.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE 100000
int array[SIZE];
static inline void iota(int* array, int size) {
for (int i = 0; i < size; i++) {
array[i] = i;
}
}
static inline void swap(int* a, int* b) {
int temp = *a;
*a = *b;
*b = temp;
}
static inline void shuffle(int* array, int size) {
for (int i = size - 1; i > 0; i--) {
int j = rand() % (i + 1);
swap(&array[i], &array[j]);
}
}
// k is the INDEX that we want of the sorted array
int qselect(int* array, int start, int end, int k) {
// k must be < size of array, I don't check that
// lomuto partition
int pivot = array[end];
int at = start;
for (int i = start; i < end; i++) {
if (array[i] < pivot) {
swap(&array[at], &array[i]);
at++;
}
}
swap(&array[at], &array[end]);
// pivot is at index 'at'
if (k == at)
return array[at];
else if (k < at)
return qselect(array, start, at - 1, k);
else
return qselect(array, at + 1, end, k);
}
int main() {
srand(time(NULL));
iota(array, SIZE);
shuffle(array, SIZE);
// qselect(i) must be i, also note that the impl modifies the input array, you often don't want that
printf("%d\n", qselect(array, 0, SIZE - 1, 434)); // should print 434
return 0;
}This, like quicksort, can be worst case though there’s a deterministic version that is worst case, albeit with a larger constant factor. Hence the reason why it’s not used in quicksort.
The improvement is in pivot selection, using an approach called “median of medians”. The idea is that you divide the array into groups of 5, find the median of each group and recursively find the median of those medians. This median is guaranteed to be a good pivot giving a theoretical guarantee. The implementation is interesting because the median of medians itself uses quickselect with median of medians.
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define SIZE 100000
int array[SIZE];
static inline void iota(int* array, int size) {
for (int i = 0; i < size; i++) {
array[i] = i;
}
}
static inline void swap(int* a, int* b) {
int temp = *a;
*a = *b;
*b = temp;
}
static inline void shuffle(int* array, int size) {
for (int i = size - 1; i > 0; i--) {
int j = rand() % (i + 1);
swap(&array[i], &array[j]);
}
}
static inline void insertion_sort(int* array, int start, int end) {
for (int i = start + 1; i <= end; i++) {
int key = array[i];
int j = i;
for (; j > start && array[j - 1] > key; j--)
array[j] = array[j - 1];
array[j] = key;
}
}
// k is the INDEX that we want of the sorted array
int qselect_mom(int* array, int start, int end, int k) {
// k must be < size of array, I don't check that
// base case
if (end - start + 1 <= 5) {
insertion_sort(array, start, end);
return array[k];
}
// lomuto partition
int num_medians = (end - start + 1) / 5;
int medians[num_medians]; // on stack
for (int group = 0, group_start = start; group < num_medians; group++, group_start += 5) {
int group_end = group_start + 4;
if (group_end > end) {
group_end = end;
}
insertion_sort(array, group_start, group_end);
medians[group] = array[(group_start + group_end) / 2];
}
// now we have the medians, we can find the median of medians
int pivot = qselect_mom(medians, 0, num_medians - 1, num_medians / 2);
// now we can partition the array around the median of medians
int pivot_index = -1;
for (int i = start; i <= end; i++) {
if (array[i] == pivot) {
pivot_index = i;
break;
}
}
if (pivot_index == -1) {
fprintf(stderr, "Error: pivot not found in the array.\n");
exit(EXIT_FAILURE);
}
swap(&array[pivot_index], &array[end]);
int at = start;
for (int i = start; i < end; i++) {
if (array[i] < pivot) {
swap(&array[at], &array[i]);
at++;
}
}
swap(&array[at], &array[end]);
// pivot is at index 'at'
if (k == at)
return array[at]; // return the index as it's in the input array
else if (k < at)
return qselect_mom(array, start, at - 1, k);
else
return qselect_mom(array, at + 1, end, k);
}
int main() {
srand(time(NULL));
iota(array, SIZE);
shuffle(array, SIZE);
// qselect(i) must be i, also note that the impl modifies the input array, you often don't want that
printf("%d\n", qselect_mom(array, 0, SIZE - 1, 434)); // should print 434
return 0;
}Again, while this is theoretically , the constant factor is much much larger.