#include <math.h>
#include <stddef.h>
#include <stdint.h>

#include "sorters.h"
#include "common.h"

#define TAIL(base,nmel,width) ((base) + ((nmel) - 1) * (width))

/* get index of last block whose head is less than the previous block's tail */
static size_t last_overlap(char *base, size_t bcount, size_t bwidth, size_t width, cmpfun cmp)
{
	for (char *cur = TAIL(base, bcount, bwidth); --bcount; cur -= bwidth)
		if (cmp(cur - width, cur) > 0) break;
	return bcount;
}

size_t merge(char *buf, char *base, size_t anmel, size_t bnmel, size_t width, cmpfun cmp)
{
	char *a = buf;
	char *b = base + anmel * width;
	size_t sorted;

	for (sorted = 0; anmel && bnmel && cmp(base, b) <= 0; sorted++, anmel--) base += width;

	swap(base, a, anmel * width);

	while (anmel > 0 && bnmel > 0) {
		if (cmp(a, b) <= 0) {
			swap(base, a, width);
			a += width;
			anmel--;
		} else {
			swap(base, b, width);
			b += width;
			bnmel--;
		}
		base += width;
	}

	swap(base, a, anmel * width);
	return sorted;
}

void grailsort(void *unsorted, size_t nmel, size_t width, cmpfun cmp)
{
	char *base = unsorted;

	if (nmel <= MAX_SORTNET) {
		sorting_network(base, nmel, width, cmp);
		return;
	}

	size_t blknmel = sqrt(nmel);               /* elements in a block */
	size_t bufnmel = blknmel + nmel % blknmel; /* elements in the buffer */
	size_t bwidth  = blknmel * width;          /* size of a block in bytes */
	size_t blocks  = nmel / blknmel - 1;       /* number of blocks in a + b */
	size_t acount  = blocks / 2;
	size_t bcount  = blocks - acount;

	char *a = base + bufnmel * width;
	char *b = a    + acount  * bwidth;

	grailsort(a, acount * blknmel, width, cmp);
	grailsort(b, bcount * blknmel, width, cmp);

	/* if already sorted, nothing to do */
	if (cmp(TAIL(a, acount * blknmel, width), b) <= 0)
		goto distribute;

	/* sort all the a and b blocks together by their head elements */
	grailsort(a, blocks, bwidth, cmp);

	/* merge, starting from the end and working towards the beginning */
	size_t pending = 0;
	while (blocks > 1) {
		size_t overlap = last_overlap(a, blocks, bwidth, width, cmp);
		if (overlap == 0) break;
		pending = merge(base, TAIL(a, overlap, bwidth), blknmel, (blocks - overlap) * blknmel, width, cmp);
		blocks  = overlap;
	}

distribute:
	grailsort(base, bufnmel, width, cmp);
	distribute_buffer(base, bufnmel, nmel - bufnmel, width, cmp);
}