summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBobby Bingham <koorogi@koorogi.info>2014-08-18 22:59:59 -0500
committerBobby Bingham <koorogi@koorogi.info>2014-08-18 22:59:59 -0500
commit77801ff0202e96fa3a22bdfbdb6a82641f3b179b (patch)
tree73fde1ba04e03efad517b8918fe1de8a8f1df2ad
parent706da1805c0bad2a4e6f1928b7eda003f02a0ac0 (diff)
Optimize rotate
Previously, we used three rotations. This was simple, but was aggressively inlined by gcc at higher optimization levels, massively bloating the code. Now, we take the front of the array, which is to be rotated to the end, and using a series of swaps, move it progressively closer to the end. When we can no longer perform a swap of the required size because we're too close to the end, we view the remaining operation as a rotate to the right with a smaller shift size. We repeat as necessary. This generates somewhat smaller code than the three reverses, even at -Os. It also shows up to a 25% overall performance improvement for grailsort on some inputs.
-rw-r--r--common.h28
-rw-r--r--wikisort.c4
2 files changed, 15 insertions, 17 deletions
diff --git a/common.h b/common.h
index c1bfb6c..087ee4f 100644
--- a/common.h
+++ b/common.h
@@ -28,22 +28,20 @@ static void swap(char *a, char *b, size_t width)
}
}
-static void reverse(char *base, size_t nmel, size_t width)
-{
- char *last = base + (nmel - 1) * width;
- while (last > base) {
- swap(base, last, width);
- base += width;
- last -= width;
- }
-}
-
/* rotates left */
-static void rotate(char *base, size_t nmel, size_t width, size_t shift)
+static void rotate(char *base, size_t size, size_t shift)
{
- reverse(base, shift, width);
- reverse(base + shift * width, nmel - shift, width);
- reverse(base, nmel, width);
+ int dir = 1;
+ while (shift) {
+ while (2*shift <= size) {
+ swap(base, base + dir*shift, shift);
+ size -= shift;
+ base += shift*dir;
+ }
+ shift = size - shift;
+ base = dir > 0 ? base + size - shift : base - shift;
+ dir *= -1;
+ }
}
static void distribute_buffer(char *base, size_t bufnmel, size_t sortnmel, size_t width, cmpfun cmp)
@@ -55,7 +53,7 @@ static void distribute_buffer(char *base, size_t bufnmel, size_t sortnmel, size_
for (; insertpos < sortnmel && cmp(base, sorted + insertpos * width) > 0; insertpos++);
if (insertpos > 0) {
- rotate(base, bufnmel + insertpos, width, bufnmel);
+ rotate(base, (bufnmel + insertpos) * width, bufnmel * width);
}
base += (insertpos + 1) * width;
diff --git a/wikisort.c b/wikisort.c
index b04b3ff..65af111 100644
--- a/wikisort.c
+++ b/wikisort.c
@@ -85,7 +85,7 @@ void wikisort(void *unsorted, size_t nmel, size_t width, cmpfun cmp)
/* if blocks in b are all less than those in a, just need a rotate */
if (cmp(base + (nmel - 1) * width, base) <= 0) {
- rotate(base, nmel, width, a.nmel * bnmel + buf.nmel);
+ rotate(base, nmel * width, (a.nmel * bnmel + buf.nmel) * width);
return;
}
@@ -102,7 +102,7 @@ void wikisort(void *unsorted, size_t nmel, size_t width, cmpfun cmp)
pendb_nmel += rolled_blocks * bnmel;
if (pendb_nmel) {
size_t offset = pendb_nmel - binary_search(a.base, a.base - pendb_nmel * width, pendb_nmel, width, cmp);
- rotate(a.base - offset * width, bnmel + offset, width, offset);
+ rotate(a.base - offset * width, (bnmel + offset) * width, offset * width);
pendb_nmel -= offset;
if (penda.nmel && pendb_nmel) {