kth smallest element ( or median finding )

The variation on quicksort called quickselect is used here. You do exactlty the same pivot partition except, you keep track of what is the kth for “this” recursion call and only recurse into the part that contains the kth element.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
 
#define SIZE 100000
int array[SIZE];
 
static inline void iota(int* array, int size) {
    for (int i = 0; i < size; i++) {
        array[i] = i;
    }
}
 
static inline void swap(int* a, int* b) {
    int temp = *a;
    *a = *b;
    *b = temp;
}
 
static inline void shuffle(int* array, int size) {
    for (int i = size - 1; i > 0; i--) {
        int j = rand() % (i + 1);
        swap(&array[i], &array[j]);
    }
}
 
// k is the INDEX that we want of the sorted array
int qselect(int* array, int start, int end, int k) {
    // k must be < size of array, I don't check that
 
    // lomuto partition
    int pivot = array[end];
    int at = start;
    for (int i = start; i < end; i++) {
        if (array[i] < pivot) {
            swap(&array[at], &array[i]);
            at++;
        }
    }
    swap(&array[at], &array[end]);
 
    // pivot is at index 'at'
    if (k == at)
        return array[at];
    else if (k < at)
        return qselect(array, start, at - 1, k);
    else
        return qselect(array, at + 1, end, k);
}
 
int main() {
    srand(time(NULL));
    iota(array, SIZE);
    shuffle(array, SIZE);
 
    // qselect(i) must be i, also note that the impl modifies the input array, you often don't want that
    printf("%d\n", qselect(array, 0, SIZE - 1, 434));  // should print 434
 
    return 0;
}

This, like quicksort, can be worst case though there’s a deterministic version that is worst case, albeit with a larger constant factor. Hence the reason why it’s not used in quicksort.

The improvement is in pivot selection, using an approach called “median of medians”. The idea is that you divide the array into groups of 5, find the median of each group and recursively find the median of those medians. This median is guaranteed to be a good pivot giving a theoretical guarantee. The implementation is interesting because the median of medians itself uses quickselect with median of medians.

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
 
#define SIZE 100000
int array[SIZE];
 
static inline void iota(int* array, int size) {
    for (int i = 0; i < size; i++) {
        array[i] = i;
    }
}
 
static inline void swap(int* a, int* b) {
    int temp = *a;
    *a = *b;
    *b = temp;
}
 
static inline void shuffle(int* array, int size) {
    for (int i = size - 1; i > 0; i--) {
        int j = rand() % (i + 1);
        swap(&array[i], &array[j]);
    }
}
 
static inline void insertion_sort(int* array, int start, int end) {
    for (int i = start + 1; i <= end; i++) {
        int key = array[i];
        int j = i;
        for (; j > start && array[j - 1] > key; j--)
            array[j] = array[j - 1];
        array[j] = key;
    }
}
 
// k is the INDEX that we want of the sorted array
int qselect_mom(int* array, int start, int end, int k) {
    // k must be < size of array, I don't check that
 
    // base case
    if (end - start + 1 <= 5) {
        insertion_sort(array, start, end);
        return array[k];
    }
 
    // lomuto partition
    int num_medians = (end - start + 1) / 5;
    int medians[num_medians];  // on stack
    for (int group = 0, group_start = start; group < num_medians; group++, group_start += 5) {
        int group_end = group_start + 4;
        if (group_end > end) {
            group_end = end;
        }
 
        insertion_sort(array, group_start, group_end);
        medians[group] = array[(group_start + group_end) / 2];
    }
    // now we have the medians, we can find the median of medians
    int pivot = qselect_mom(medians, 0, num_medians - 1, num_medians / 2);
    // now we can partition the array around the median of medians
 
    int pivot_index = -1;
    for (int i = start; i <= end; i++) {
        if (array[i] == pivot) {
            pivot_index = i;
            break;
        }
    }
    if (pivot_index == -1) {
        fprintf(stderr, "Error: pivot not found in the array.\n");
        exit(EXIT_FAILURE);
    }
    swap(&array[pivot_index], &array[end]);
    int at = start;
    for (int i = start; i < end; i++) {
        if (array[i] < pivot) {
            swap(&array[at], &array[i]);
            at++;
        }
    }
    swap(&array[at], &array[end]);
 
    // pivot is at index 'at'
    if (k == at)
        return array[at];  // return the index as it's in the input array
    else if (k < at)
        return qselect_mom(array, start, at - 1, k);
    else
        return qselect_mom(array, at + 1, end, k);
}
 
int main() {
    srand(time(NULL));
    iota(array, SIZE);
    shuffle(array, SIZE);
 
    // qselect(i) must be i, also note that the impl modifies the input array, you often don't want that
    printf("%d\n", qselect_mom(array, 0, SIZE - 1, 434));  // should print 434
 
    return 0;
}

Again, while this is theoretically , the constant factor is much much larger.

Majority Element