summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBobby Bingham <koorogi@koorogi.info>2014-06-21 11:24:23 -0500
committerBobby Bingham <koorogi@koorogi.info>2014-06-21 11:24:23 -0500
commitf305d9d704dd76385941414287f5124d8c35d8b5 (patch)
tree159d4d7abb573572e5c30da787c8c7b2033ec868
parent5fa0d5023f0e2935f99b04422f3a338d77264107 (diff)
Add glibc's sorts
Also rename musl's sort function for more consistency.
-rw-r--r--bench.c2
-rw-r--r--glibc_msort.c309
-rw-r--r--glibc_qsort.c249
-rw-r--r--musl_qsort.c2
-rw-r--r--sorters.c4
-rw-r--r--sorters.h4
6 files changed, 566 insertions, 4 deletions
diff --git a/bench.c b/bench.c
index fb46b94..a51ee99 100644
--- a/bench.c
+++ b/bench.c
@@ -28,7 +28,7 @@ static int compare(const void *a, const void *b)
#define CMP_WIDTH 12
#define MS_WIDTH 6
-#define SORT_WIDTH 10
+#define SORT_WIDTH 16
#define GEN_WIDTH (CMP_WIDTH + MS_WIDTH + 1)
#define SIZE_WIDTH 10
diff --git a/glibc_msort.c b/glibc_msort.c
new file mode 100644
index 0000000..e35f477
--- /dev/null
+++ b/glibc_msort.c
@@ -0,0 +1,309 @@
+/* An alternative to qsort, with an identical interface.
+ This file is part of the GNU C Library.
+ Copyright (C) 1992-2014 Free Software Foundation, Inc.
+ Written by Mike Haertel, September 1988.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* minor modifications made to compile in the qsort_bench tree rather than the glibc tree */
+
+#include <alloca.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "sorters.h"
+
+#define __alloca(n) alloca(n)
+#define __sysconf(n) sysconf(n)
+#define atomic_write_barrier() do { } while(0)
+#define __set_errno(e) do { errno = (e); } while(0)
+
+static inline void *__mempcpy(void * restrict dest, const void * restrict src, size_t n)
+{
+ memcpy(dest, src, n);
+ return ((char *) dest) + n;
+}
+
+struct msort_param
+{
+ size_t s;
+ size_t var;
+ cmpfun cmp;
+ char *t;
+};
+
+static void
+msort_with_tmp (const struct msort_param *p, void *b, size_t n)
+{
+ char *b1, *b2;
+ size_t n1, n2;
+
+ if (n <= 1)
+ return;
+
+ n1 = n / 2;
+ n2 = n - n1;
+ b1 = b;
+ b2 = (char *) b + (n1 * p->s);
+
+ msort_with_tmp (p, b1, n1);
+ msort_with_tmp (p, b2, n2);
+
+ char *tmp = p->t;
+ const size_t s = p->s;
+ cmpfun cmp = p->cmp;
+ switch (p->var)
+ {
+ case 0:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ *(uint32_t *) tmp = *(uint32_t *) b1;
+ b1 += sizeof (uint32_t);
+ --n1;
+ }
+ else
+ {
+ *(uint32_t *) tmp = *(uint32_t *) b2;
+ b2 += sizeof (uint32_t);
+ --n2;
+ }
+ tmp += sizeof (uint32_t);
+ }
+ break;
+ case 1:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ *(uint64_t *) tmp = *(uint64_t *) b1;
+ b1 += sizeof (uint64_t);
+ --n1;
+ }
+ else
+ {
+ *(uint64_t *) tmp = *(uint64_t *) b2;
+ b2 += sizeof (uint64_t);
+ --n2;
+ }
+ tmp += sizeof (uint64_t);
+ }
+ break;
+ case 2:
+ while (n1 > 0 && n2 > 0)
+ {
+ unsigned long *tmpl = (unsigned long *) tmp;
+ unsigned long *bl;
+
+ tmp += s;
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ bl = (unsigned long *) b1;
+ b1 += s;
+ --n1;
+ }
+ else
+ {
+ bl = (unsigned long *) b2;
+ b2 += s;
+ --n2;
+ }
+ while (tmpl < (unsigned long *) tmp)
+ *tmpl++ = *bl++;
+ }
+ break;
+ case 3:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (*(const void **) b1, *(const void **) b2) <= 0)
+ {
+ *(void **) tmp = *(void **) b1;
+ b1 += sizeof (void *);
+ --n1;
+ }
+ else
+ {
+ *(void **) tmp = *(void **) b2;
+ b2 += sizeof (void *);
+ --n2;
+ }
+ tmp += sizeof (void *);
+ }
+ break;
+ default:
+ while (n1 > 0 && n2 > 0)
+ {
+ if ((*cmp) (b1, b2) <= 0)
+ {
+ tmp = (char *) __mempcpy (tmp, b1, s);
+ b1 += s;
+ --n1;
+ }
+ else
+ {
+ tmp = (char *) __mempcpy (tmp, b2, s);
+ b2 += s;
+ --n2;
+ }
+ }
+ break;
+ }
+
+ if (n1 > 0)
+ memcpy (tmp, b1, n1 * s);
+ memcpy (b, p->t, (n - n2) * s);
+}
+
+
+void
+glibc_mergesort (void *b, size_t n, size_t s, cmpfun cmp)
+{
+ size_t size = n * s;
+ char *tmp = NULL;
+ struct msort_param p;
+
+ /* For large object sizes use indirect sorting. */
+ if (s > 32)
+ size = 2 * n * sizeof (void *) + s;
+
+ if (size < 1024)
+ /* The temporary array is small, so put it on the stack. */
+ p.t = __alloca (size);
+ else
+ {
+ /* We should avoid allocating too much memory since this might
+ have to be backed up by swap space. */
+ static long int phys_pages;
+ static int pagesize;
+
+ if (pagesize == 0)
+ {
+ phys_pages = __sysconf (_SC_PHYS_PAGES);
+
+ if (phys_pages == -1)
+ /* Error while determining the memory size. So let's
+ assume there is enough memory. Otherwise the
+ implementer should provide a complete implementation of
+ the `sysconf' function. */
+ phys_pages = (long int) (~0ul >> 1);
+
+ /* The following determines that we will never use more than
+ a quarter of the physical memory. */
+ phys_pages /= 4;
+
+ /* Make sure phys_pages is written to memory. */
+ atomic_write_barrier ();
+
+ pagesize = __sysconf (_SC_PAGESIZE);
+ }
+
+ /* Just a comment here. We cannot compute
+ phys_pages * pagesize
+ and compare the needed amount of memory against this value.
+ The problem is that some systems might have more physical
+ memory then can be represented with a `size_t' value (when
+ measured in bytes. */
+
+ /* If the memory requirements are too high don't allocate memory. */
+ if (size / pagesize > (size_t) phys_pages)
+ {
+ glibc_quicksort (b, n, s, cmp);
+ return;
+ }
+
+ /* It's somewhat large, so malloc it. */
+ int save = errno;
+ tmp = malloc (size);
+ __set_errno (save);
+ if (tmp == NULL)
+ {
+ /* Couldn't get space, so use the slower algorithm
+ that doesn't need a temporary array. */
+ glibc_quicksort (b, n, s, cmp);
+ return;
+ }
+ p.t = tmp;
+ }
+
+ p.s = s;
+ p.var = 4;
+ p.cmp = cmp;
+
+ if (s > 32)
+ {
+ /* Indirect sorting. */
+ char *ip = (char *) b;
+ void **tp = (void **) (p.t + n * sizeof (void *));
+ void **t = tp;
+ void *tmp_storage = (void *) (tp + n);
+
+ while ((void *) t < tmp_storage)
+ {
+ *t++ = ip;
+ ip += s;
+ }
+ p.s = sizeof (void *);
+ p.var = 3;
+ msort_with_tmp (&p, p.t + n * sizeof (void *), n);
+
+ /* tp[0] .. tp[n - 1] is now sorted, copy around entries of
+ the original array. Knuth vol. 3 (2nd ed.) exercise 5.2-10. */
+ char *kp;
+ size_t i;
+ for (i = 0, ip = (char *) b; i < n; i++, ip += s)
+ if ((kp = tp[i]) != ip)
+ {
+ size_t j = i;
+ char *jp = ip;
+ memcpy (tmp_storage, ip, s);
+
+ do
+ {
+ size_t k = (kp - (char *) b) / s;
+ tp[j] = jp;
+ memcpy (jp, kp, s);
+ j = k;
+ jp = kp;
+ kp = tp[k];
+ }
+ while (kp != ip);
+
+ tp[j] = jp;
+ memcpy (jp, tmp_storage, s);
+ }
+ }
+ else
+ {
+ if ((s & (sizeof (uint32_t) - 1)) == 0
+ && ((char *) b - (char *) 0) % __alignof__ (uint32_t) == 0)
+ {
+ if (s == sizeof (uint32_t))
+ p.var = 0;
+ else if (s == sizeof (uint64_t)
+ && ((char *) b - (char *) 0) % __alignof__ (uint64_t) == 0)
+ p.var = 1;
+ else if ((s & (sizeof (unsigned long) - 1)) == 0
+ && ((char *) b - (char *) 0)
+ % __alignof__ (unsigned long) == 0)
+ p.var = 2;
+ }
+ msort_with_tmp (&p, b, n);
+ }
+ free (tmp);
+}
diff --git a/glibc_qsort.c b/glibc_qsort.c
new file mode 100644
index 0000000..ed7baf7
--- /dev/null
+++ b/glibc_qsort.c
@@ -0,0 +1,249 @@
+/* Copyright (C) 1991-2014 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* If you consider tuning this algorithm, you should consult first:
+ Engineering a sort function; Jon Bentley and M. Douglas McIlroy;
+ Software - Practice and Experience; Vol. 23 (11), 1249-1265, 1993. */
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sorters.h"
+
+/* Byte-wise swap two items of size SIZE. */
+#define SWAP(a, b, size) \
+ do \
+ { \
+ size_t __size = (size); \
+ char *__a = (a), *__b = (b); \
+ do \
+ { \
+ char __tmp = *__a; \
+ *__a++ = *__b; \
+ *__b++ = __tmp; \
+ } while (--__size > 0); \
+ } while (0)
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+ This particular magic number was chosen to work best on a Sun 4/260. */
+#define MAX_THRESH 4
+
+/* Stack node declarations used to store unfulfilled partition obligations. */
+typedef struct
+ {
+ char *lo;
+ char *hi;
+ } stack_node;
+
+/* The next 4 #defines implement a very fast in-line stack abstraction. */
+/* The stack needs log (total_elements) entries (we could even subtract
+ log(MAX_THRESH)). Since total_elements has type size_t, we get as
+ upper bound for log (total_elements):
+ bits per byte (CHAR_BIT) * sizeof(size_t). */
+#define STACK_SIZE (CHAR_BIT * sizeof(size_t))
+#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
+#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
+#define STACK_NOT_EMPTY (stack < top)
+
+
+/* Order size using quicksort. This implementation incorporates
+ four optimizations discussed in Sedgewick:
+
+ 1. Non-recursive, using an explicit stack of pointer that store the
+ next array partition to sort. To save time, this maximum amount
+ of space required to store an array of SIZE_MAX is allocated on the
+ stack. Assuming a 32-bit (64 bit) integer for size_t, this needs
+ only 32 * sizeof(stack_node) == 256 bytes (for 64 bit: 1024 bytes).
+ Pretty cheap, actually.
+
+ 2. Chose the pivot element using a median-of-three decision tree.
+ This reduces the probability of selecting a bad pivot value and
+ eliminates certain extraneous comparisons.
+
+ 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
+ insertion sort to order the MAX_THRESH items within each partition.
+ This is a big win, since insertion sort is faster for small, mostly
+ sorted array segments.
+
+ 4. The larger of the two sub-partitions is always pushed onto the
+ stack first, with the algorithm then concentrating on the
+ smaller partition. This *guarantees* no more than log (total_elems)
+ stack size is needed (actually O(1) in this case)! */
+
+void
+glibc_quicksort (void *pbase, size_t total_elems, size_t size, cmpfun cmp)
+{
+ char *base_ptr = (char *) pbase;
+
+ const size_t max_thresh = MAX_THRESH * size;
+
+ if (total_elems == 0)
+ /* Avoid lossage with unsigned arithmetic below. */
+ return;
+
+ if (total_elems > MAX_THRESH)
+ {
+ char *lo = base_ptr;
+ char *hi = &lo[size * (total_elems - 1)];
+ stack_node stack[STACK_SIZE];
+ stack_node *top = stack;
+
+ PUSH (NULL, NULL);
+
+ while (STACK_NOT_EMPTY)
+ {
+ char *left_ptr;
+ char *right_ptr;
+
+ /* Select median value from among LO, MID, and HI. Rearrange
+ LO and HI so the three values are sorted. This lowers the
+ probability of picking a pathological pivot value and
+ skips a comparison for both the LEFT_PTR and RIGHT_PTR in
+ the while loops. */
+
+ char *mid = lo + size * ((hi - lo) / size >> 1);
+
+ if ((*cmp) ((void *) mid, (void *) lo) < 0)
+ SWAP (mid, lo, size);
+ if ((*cmp) ((void *) hi, (void *) mid) < 0)
+ SWAP (mid, hi, size);
+ else
+ goto jump_over;
+ if ((*cmp) ((void *) mid, (void *) lo) < 0)
+ SWAP (mid, lo, size);
+ jump_over:;
+
+ left_ptr = lo + size;
+ right_ptr = hi - size;
+
+ /* Here's the famous ``collapse the walls'' section of quicksort.
+ Gotta like those tight inner loops! They are the main reason
+ that this algorithm runs much faster than others. */
+ do
+ {
+ while ((*cmp) ((void *) left_ptr, (void *) mid) < 0)
+ left_ptr += size;
+
+ while ((*cmp) ((void *) mid, (void *) right_ptr) < 0)
+ right_ptr -= size;
+
+ if (left_ptr < right_ptr)
+ {
+ SWAP (left_ptr, right_ptr, size);
+ if (mid == left_ptr)
+ mid = right_ptr;
+ else if (mid == right_ptr)
+ mid = left_ptr;
+ left_ptr += size;
+ right_ptr -= size;
+ }
+ else if (left_ptr == right_ptr)
+ {
+ left_ptr += size;
+ right_ptr -= size;
+ break;
+ }
+ }
+ while (left_ptr <= right_ptr);
+
+ /* Set up pointers for next iteration. First determine whether
+ left and right partitions are below the threshold size. If so,
+ ignore one or both. Otherwise, push the larger partition's
+ bounds on the stack and continue sorting the smaller one. */
+
+ if ((size_t) (right_ptr - lo) <= max_thresh)
+ {
+ if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore both small partitions. */
+ POP (lo, hi);
+ else
+ /* Ignore small left partition. */
+ lo = left_ptr;
+ }
+ else if ((size_t) (hi - left_ptr) <= max_thresh)
+ /* Ignore small right partition. */
+ hi = right_ptr;
+ else if ((right_ptr - lo) > (hi - left_ptr))
+ {
+ /* Push larger left partition indices. */
+ PUSH (lo, right_ptr);
+ lo = left_ptr;
+ }
+ else
+ {
+ /* Push larger right partition indices. */
+ PUSH (left_ptr, hi);
+ hi = right_ptr;
+ }
+ }
+ }
+
+ /* Once the BASE_PTR array is partially sorted by quicksort the rest
+ is completely sorted using insertion sort, since this is efficient
+ for partitions below MAX_THRESH size. BASE_PTR points to the beginning
+ of the array to sort, and END_PTR points at the very last element in
+ the array (*not* one beyond it!). */
+
+#define min(x, y) ((x) < (y) ? (x) : (y))
+
+ {
+ char *const end_ptr = &base_ptr[size * (total_elems - 1)];
+ char *tmp_ptr = base_ptr;
+ char *thresh = min(end_ptr, base_ptr + max_thresh);
+ char *run_ptr;
+
+ /* Find smallest element in first threshold and place it at the
+ array's beginning. This is the smallest array element,
+ and the operation speeds up insertion sort's inner loop. */
+
+ for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
+ if ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
+ tmp_ptr = run_ptr;
+
+ if (tmp_ptr != base_ptr)
+ SWAP (tmp_ptr, base_ptr, size);
+
+ /* Insertion sort, running from left-hand-side up to right-hand-side. */
+
+ run_ptr = base_ptr + size;
+ while ((run_ptr += size) <= end_ptr)
+ {
+ tmp_ptr = run_ptr - size;
+ while ((*cmp) ((void *) run_ptr, (void *) tmp_ptr) < 0)
+ tmp_ptr -= size;
+
+ tmp_ptr += size;
+ if (tmp_ptr != run_ptr)
+ {
+ char *trav;
+
+ trav = run_ptr + size;
+ while (--trav >= run_ptr)
+ {
+ char c = *trav;
+ char *hi, *lo;
+
+ for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
+ *hi = *lo;
+ *hi = c;
+ }
+ }
+ }
+ }
+}
diff --git a/musl_qsort.c b/musl_qsort.c
index 21a39bd..3792aa1 100644
--- a/musl_qsort.c
+++ b/musl_qsort.c
@@ -154,7 +154,7 @@ static void trinkle(unsigned char *head, size_t width, cmpfun cmp, size_t pp[2],
}
}
-void qsort_musl(void *base, size_t nel, size_t width, cmpfun cmp)
+void musl_qsort(void *base, size_t nel, size_t width, cmpfun cmp)
{
size_t lp[12*sizeof(size_t)];
size_t i, size = width * nel;
diff --git a/sorters.c b/sorters.c
index a630136..5150272 100644
--- a/sorters.c
+++ b/sorters.c
@@ -1,6 +1,8 @@
#include "sorters.h"
const struct sorter sorters[] = {
- { .name = "musl", .func = qsort_musl },
+ { .name = "glibc quicksort", .func = glibc_quicksort },
+ { .name = "glibc mergesort", .func = glibc_mergesort },
+ { .name = "musl", .func = musl_qsort },
{ 0 }
};
diff --git a/sorters.h b/sorters.h
index 1a2397a..c6850d2 100644
--- a/sorters.h
+++ b/sorters.h
@@ -3,7 +3,9 @@
typedef int (*cmpfun)(const void *, const void *);
typedef void (*sorter)(void *, size_t, size_t, cmpfun);
-void qsort_musl(void *, size_t, size_t, cmpfun);
+void musl_qsort(void *, size_t, size_t, cmpfun);
+void glibc_quicksort(void *, size_t, size_t, cmpfun);
+void glibc_mergesort(void *, size_t, size_t, cmpfun);
extern const struct sorter {
const char *name;