summaryrefslogtreecommitdiff
path: root/grailsort.c
blob: ca9d1459c843c8a3a1a8b4beea0ca921d3855a50 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#include <math.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <limits.h>

#include "sorters.h"
#include "common.h"
#include "counts.h"

static inline void *tail(char *base, size_t nmel, size_t width)
{
	return base + (nmel-1) * width;
}

/* get index of last block whose head is less than the previous block's tail */
static size_t last_overlap(char *base, size_t bcount, size_t bwidth, size_t width, cmpfun cmp)
{
	struct counts snapshot = counts[CURRENT];

	for (char *cur = tail(base, bcount, bwidth); --bcount; cur -= bwidth)
		if (cmp(cur - width, cur) > 0) break;

	add_counts(counts + LAST_OVERLAP, &snapshot);
	return bcount;
}

void merge_preswapped(char *buf, char *a, char *b, size_t anmel, size_t bnmel, size_t width, cmpfun cmp)
{
	while (anmel && bnmel) {
		size_t count;

		do {
			for (count = 0; bnmel && count < anmel && cmp(a, b + count*width) > 0; count++) bnmel--;
			swap(buf, b, count * width);
			b   += count * width;
			buf += count * width;
		} while (count == anmel);

		for (count = 0; anmel && cmp(a + count*width, b) <= 0; count++) anmel--;
		swap(buf, a, count * width);
		a   += count * width;
		buf += count * width;
	}

	swap(buf, a, anmel * width);
}

size_t merge(char *buf, char *a, char *b, size_t anmel, size_t bnmel, size_t width, cmpfun cmp)
{
	struct counts snapshot = counts[CURRENT];

	size_t skipa;
	for (skipa = 0; cmp(a, b) <= 0; skipa++) a += width;
	anmel -= skipa;

	swap(buf, a, anmel * width);
	merge_preswapped(a, buf, b, anmel, bnmel, width, cmp);

	add_counts(counts + MERGE, &snapshot);
	return skipa;
}

void grailsort(void *unsorted, size_t nmel, size_t width, cmpfun cmp)
{
	char *base = unsorted;

	if (nmel <= MAX_SORTNET) {
		sorting_network(base, nmel, width, cmp);
		return;
	}

	size_t blknmel = sqrt(nmel);               /* elements in a block */
	size_t bufnmel = blknmel + nmel % blknmel; /* elements in the buffer */
	size_t bwidth  = blknmel * width;          /* size of a block in bytes */
	size_t blocks  = nmel / blknmel - 1;       /* number of blocks in a + b */
	size_t acount  = blocks / 2;
	size_t bcount  = blocks - acount;

	char *buf = base;
	char *a   = buf + bufnmel * width;
	char *b   = a   + acount * bwidth;

	grailsort(buf, acount * blknmel + bufnmel, width, cmp);
	grailsort(b,   bcount * blknmel,           width, cmp);

	struct counts snapshot = counts[CURRENT];
	int tmp = cmp(tail(a, acount*blknmel, width), b) <= 0;
	add_counts(counts + CHECK_SORTED, &snapshot);
	if (tmp) return;

	snapshot = counts[CURRENT];
	tmp = cmp(base, tail(b, bcount*blknmel, width)) >= 0;
	if (tmp) {
		rotate(base, nmel*width, acount*bwidth + bufnmel*width);
		add_counts(counts + CHECK_SORTED, &snapshot);
		return;
	}
	add_counts(counts + CHECK_SORTED, &snapshot);

	/* steal some small elements from b for the buffer,
	 * so the buffer has the bufnmel smallest elements */
	size_t usebuf = 0, stealb = 0;
	snapshot = counts[CURRENT];
	do {
		if (cmp(buf + usebuf*width, b + stealb*width) <= 0) {
			usebuf++;
		} else {
			stealb++;
		}
	} while (stealb < bcount*blknmel && usebuf + stealb < bufnmel);
	if (stealb) {
		merge_preswapped(b, a - stealb*width, b + stealb*width, stealb, bcount*blknmel - stealb, width, cmp);
	}
	add_counts(counts + STEAL_BUF, &snapshot);

	/* sort all the a and b blocks together by their head elements */
	grailsort(a, blocks, bwidth, cmp);

	/* merge, starting from the end and working towards the beginning */
	size_t last_nmel = blknmel;
	while (blocks > 1) {
		size_t overlap = last_overlap(a, blocks, bwidth, width, cmp);

		if (!overlap) break;

		char *block_a = a + bwidth * (overlap-1);
		char *block_b = block_a + bwidth;
		size_t bnmel  = (blocks - overlap - 1) * blknmel + last_nmel;

		last_nmel = merge(buf, block_a, block_b, blknmel, bnmel, width, cmp);
		blocks    = overlap;
	}

	grailsort(buf, bufnmel, width, cmp);

#if 0
	assert_sorted(base, nmel, width, cmp);
#endif
}