another broken qsort

This commit is contained in:
Jan (janneke) Nieuwenhuizen 2022-10-25 09:26:27 +02:00
parent d5cb2ae8cb
commit 386341092e
No known key found for this signature in database
GPG Key ID: F3C1A0D9C1D65273
1 changed files with 119 additions and 224 deletions

View File

@ -18,244 +18,139 @@
* along with GNU Mes. If not, see <http://www.gnu.org/licenses/>. * along with GNU Mes. If not, see <http://www.gnu.org/licenses/>.
*/ */
/* Copyright (C) 1991, 1992 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Douglas C. Schmidt (schmidt@ics.uci.edu).
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 675 Mass Ave,
Cambridge, MA 02139, USA. */
#include <alloca.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
/* Byte-wise swap two items of size SIZE. */ #if 0
#define SWAP(a, b, size) \
do \
{ \
size_t __size = (size); \
char *__a = (a), *__b = (b); \
do \
{ \
char __tmp = *__a; \
*__a++ = *__b; \
*__b++ = __tmp; \
} while (--__size > 0); \
} while (0)
/* Discontinue quicksort algorithm when partition gets below this size.
This particular magic number was chosen to work best on a Sun 4/260. */
#define MAX_THRESH 4
/* Stack node declarations used to store unfulfilled partition obligations. */
typedef struct
{
char *lo;
char *hi;
} stack_node;
/* The next 4 #defines implement a very fast in-line stack abstraction. */
#define STACK_SIZE (8 * sizeof(unsigned long int))
#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
#define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi)))
#define STACK_NOT_EMPTY (stack < top)
/* Order size using quicksort. This implementation incorporates
four optimizations discussed in Sedgewick:
1. Non-recursive, using an explicit stack of pointer that store the
next array partition to sort. To save time, this maximum amount
of space required to store an array of MAX_INT is allocated on the
stack. Assuming a 32-bit integer, this needs only 32 *
sizeof(stack_node) == 136 bits. Pretty cheap, actually.
2. Chose the pivot element using a median-of-three decision tree.
This reduces the probability of selecting a bad pivot value and
eliminates certain extraneous comparisons.
3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving
insertion sort to order the MAX_THRESH items within each partition.
This is a big win, since insertion sort is faster for small, mostly
sorted array segements.
4. The larger of the two sub-partitions is always pushed onto the
stack first, with the algorithm then concentrating on the
smaller partition. This *guarantees* no more than log (n)
stack size is needed (actually O(1) in this case)! */
void void
qsort (void* base, size_t total_elems, size_t size, qswap (void *a, void *b, size_t size)
int (*compare)(void const *, void const *))
{ {
char *base_ptr = (char *) base; char *pa = a;
char *pb = b;
/* Allocating SIZE bytes for a pivot buffer facilitates a better
algorithm below since we can do comparisons directly on the pivot. */
char *pivot_buffer = (char *) alloca (size);
const size_t max_thresh = MAX_THRESH * size;
if (total_elems == 0)
/* Avoid lossage with unsigned arithmetic below. */
return;
if (total_elems > MAX_THRESH)
{
char *lo = base_ptr;
char *hi = &lo[size * (total_elems - 1)];
/* Largest size needed for 32-bit int!!! */
stack_node stack[STACK_SIZE];
stack_node *top = stack + 1;
while (STACK_NOT_EMPTY)
{
char *left_ptr;
char *right_ptr;
char *pivot = pivot_buffer;
/* Select median value from among LO, MID, and HI. Rearrange
LO and HI so the three values are sorted. This lowers the
probability of picking a pathological pivot value and
skips a comparison for both the LEFT_PTR and RIGHT_PTR. */
char *mid = lo + size * ((hi - lo) / size >> 1);
if ((*compare)((void*) mid, (void*) lo) < 0)
SWAP (mid, lo, size);
if ((*compare)((void*) hi, (void*) mid) < 0)
SWAP (mid, hi, size);
else
goto jump_over;
if ((*compare)((void*) mid, (void*) lo) < 0)
SWAP (mid, lo, size);
jump_over:;
memcpy (pivot, mid, size);
pivot = pivot_buffer;
left_ptr = lo + size;
right_ptr = hi - size;
/* Here's the famous ``collapse the walls'' section of quicksort.
Gotta like those tight inner loops! They are the main reason
that this algorithm runs much faster than others. */
do do
{ {
while ((*compare)((void*) left_ptr, (void*) pivot) < 0) char tmp = *pa;
left_ptr += size; *pa++ = *pb;
*pb++ = tmp;
while ((*compare)((void*) pivot, (void*) right_ptr) < 0) } while (--size > 0);
right_ptr -= size; }
#else
if (left_ptr < right_ptr) void
qswap (void *a, void *b, int size)
{ {
SWAP (left_ptr, right_ptr, size); char buffer[size];
left_ptr += size; memcpy (buffer, a, size);
right_ptr -= size; memcpy (a, b, size);
memcpy (b, buffer, size);
} }
else if (left_ptr == right_ptr) #endif
/**
* Assuming precondition (P) that `end - begin >= 2`, this function reorders the elements
* of range [begin, end) and returns a pointer `ret` such that the following
* postconditions hold:
* - (Q1): `ret > begin`
* - (Q2): `ret < end`
* and, for some value `p` in [begin, end):
* - (Q3): all values in [begin, ret) are lower than or equal to `p`
* - (Q4): all values in [ret, end) are greater than or equal to `p`
*/
char *
qpart (char *low, char *high, size_t size,
int (*compare) (void const *, void const *))
{ {
left_ptr += size; char *pivot = (low + (high - low)/2);
right_ptr -= size;
break;
}
}
while (left_ptr <= right_ptr);
/* Set up pointers for next iteration. First determine whether // Loop invariants, all trivially verified at the start of the loop:
left and right partitions are below the threshold size. If so, // - (A): values strictly to the left of `low` are lower than or equal to `pivot`
ignore one or both. Otherwise, push the larger partition's // - (B): there is at least one value at or to the right of `low` that is greater
bounds on the stack and continue sorting the smaller one. */ // than or equal to `pivot`
// - (C): values at or to the right of `high` are greater than or equal to `pivot`
if ((size_t) (right_ptr - lo) <= max_thresh) // - (D): there is at least one value strictly to the left of `high` that is lower
// than or equal to `pivot`
// - (E): `low <= high`
//
// The loop terminates because `high - low` decreases strictly at each execution of
// the body (obvious).
while (1)
{ {
if ((size_t) (hi - left_ptr) <= max_thresh) // This loop terminates because of (B).
/* Ignore both small partitions. */ int c = compare (low, pivot);
POP (lo, hi); while (c < 0)
else low += size;
/* Ignore small left partition. */
lo = left_ptr; // Here, we have
} // - (1): `*low >= pivot`
else if ((size_t) (hi - left_ptr) <= max_thresh) // - (2): `low <= high` because of (E) and (C)
/* Ignore small right partition. */ // - properties (A) and (B) still hold because `low` has only moved
hi = right_ptr; // past values strictly less than `pivot`
else if ((right_ptr - lo) > (hi - left_ptr))
// This loop terminates because of (D).
do {
--high;
int c = compare (pivot, high);
} while (c < 0);
// Here, we have
// - (3): `*high <= pivot`
// - (4): by (C) which held before this loop, elements strictly to the
// right of `high` are known to be greater than or equal to `pivot`
// (but now (C) may not hold anymore)
if (low >= high)
{ {
/* Push larger left partition indices. */ // Due to (1), (A) and (4), (Q3) and (Q4) are established with `pivot`
PUSH( lo, right_ptr); // as `p`.
lo = left_ptr; // Clearly, (B) proves Q2.
// See the rest of the answer below for a proof of (Q1).
// This correctly finishes the qpart.
return low;
} }
else
// We have `low < high` and we swap...
qswap (low, high, size);
// ...and now,
// - by (1) and (4), invariant (C) is re-established
// - by (1), invariant (D) is re-established
// - (5): by (3), `*low <= pivot`
++low;
// (A) already held before this increment. Thus, because of (5), (A)
// still holds. Additionally, by (1), after the swap, (B) is
// re-established. Finally, (E) is obvious.
}
}
void
_qsort (char *low, char *high, size_t size,
int (*compare) (void const *, void const *))
{ {
/* Push larger right partition indices. */ // Trivial base case...
PUSH (left_ptr, hi); if (low - high < size)
hi = right_ptr; return;
}
} // ...therefore pre-condition (P) of `qpart` is satisfied.
char *p = qpart (low, high, size, compare);
// Thanks to postconditions (Q1) and (Q2) of `qpart`, the ranges
// [low, p) and [p, high) are non-empty, therefore the size of the ranges
// passed to the recursive calls below is strictly lower than the size of
// [low, high) in this call. Therefore the base case is eventually reached
// and the algorithm terminates.
// Thanks to postconditions (Q3) and (Q4) of `qpart`, and by induction
// on the size of [low, high), the recursive calls below sort their
// respective argument ranges and [low, high) is sorted as a result.
_qsort (low, p, size, compare);
_qsort (p, high, size, compare);
} }
/* Once the BASE_void* array is partially sorted by quicksort the rest void
is completely sorted using insertion sort, since this is efficient qsort (void *base, size_t count, size_t size,
for partitions below MAX_THRESH size. BASE_void* points to the beginning int (*compare) (void const *, void const *))
of the array to sort, and END_void* points at the very last element in
the array (*not* one beyond it!). */
#define min(x, y) ((x) < (y) ? (x) : (y))
{ {
char *const end_ptr = &base_ptr[size * (total_elems - 1)]; char *high = base + count * size;
char *tmp_ptr = base_ptr; _qsort (base, high, size, compare);
char *thresh = min(end_ptr, base_ptr + max_thresh);
char *run_ptr;
/* Find smallest element in first threshold and place it at the
array's beginning. This is the smallest array element,
and the operation speeds up insertion sort's inner loop. */
for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size)
if ((*compare)((void*) run_ptr, (void*) tmp_ptr) < 0)
tmp_ptr = run_ptr;
if (tmp_ptr != base_ptr)
SWAP (tmp_ptr, base_ptr, size);
/* Insertion sort, running from left-hand-side up to right-hand-side. */
run_ptr = base_ptr + size;
while ((run_ptr += size) <= end_ptr)
{
tmp_ptr = run_ptr - size;
while ((*compare)((void*) run_ptr, (void*) tmp_ptr) < 0)
tmp_ptr -= size;
tmp_ptr += size;
if (tmp_ptr != run_ptr)
{
char *trav;
trav = run_ptr + size;
while (--trav >= run_ptr)
{
char c = *trav;
char *hi, *lo;
for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo)
*hi = *lo;
*hi = c;
}
}
}
}
} }