1/* 2 * The code of this file was taken from http://jeffreystedfast.blogspot.be, 3 * where it was posted in 2011 by Jeffrey Stedfast under the MIT license. 4 * The MIT license text is as follows: 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25#include <errno.h> 26#include <string.h> 27#include <stdlib.h> 28#include <isl_sort.h> 29 30#define MID(lo, hi) (lo + ((hi - lo) >> 1)) 31 32/* The code here is an optimized merge sort. Starting from a generic merge sort 33 * the following optimizations were applied: 34 * 35 * o Batching of memcpy() calls: Instead of calling memcpy() to copy each and 36 * every element into a temporary buffer, blocks of elements are copied 37 * at a time. 38 * 39 * o To reduce the number of memcpy() calls further, copying leading 40 * and trailing elements into our temporary buffer is avoided, in case it is 41 * not necessary to merge them. 42 * 43 * A further optimization could be to specialize memcpy calls based on the 44 * size of the types we compare. For now, this code does not include the 45 * relevant optimization, as clang e.g. inlines a very efficient memcpy() 46 * implementation. It is not clear, that the specialized version as provided in 47 * the blog post, is really superior to the one that will be inlined by 48 * default. So we decided to keep the code simple until this optimization was 49 * proven to be beneficial. 50 */ 51 52static void 53msort (void *array, void *buf, size_t low, size_t high, size_t size, 54 int (* compare) (const void *, const void *, void *), void *arg) 55{ 56 char *a1, *al, *am, *ah, *ls, *hs, *lo, *hi, *b; 57 size_t copied = 0; 58 size_t mid; 59 60 mid = MID (low, high); 61 62 if (mid + 1 < high) 63 msort (array, buf, mid + 1, high, size, compare, arg); 64 65 if (mid > low) 66 msort (array, buf, low, mid, size, compare, arg); 67 68 ah = ((char *) array) + ((high + 1) * size); 69 am = ((char *) array) + ((mid + 1) * size); 70 a1 = al = ((char *) array) + (low * size); 71 72 b = (char *) buf; 73 lo = al; 74 hi = am; 75 76 do { 77 ls = lo; 78 hs = hi; 79 80 if (lo > al || hi > am) { 81 /* our last loop already compared lo & hi and found lo <= hi */ 82 lo += size; 83 } 84 85 while (lo < am && compare (lo, hi, arg) <= 0) 86 lo += size; 87 88 if (lo < am) { 89 if (copied == 0) { 90 /* avoid copying the leading items */ 91 a1 = lo; 92 ls = lo; 93 } 94 95 /* our last compare tells us hi < lo */ 96 hi += size; 97 98 while (hi < ah && compare (hi, lo, arg) < 0) 99 hi += size; 100 101 if (lo > ls) { 102 memcpy (b, ls, lo - ls); 103 copied += (lo - ls); 104 b += (lo - ls); 105 } 106 107 memcpy (b, hs, hi - hs); 108 copied += (hi - hs); 109 b += (hi - hs); 110 } else if (copied) { 111 memcpy (b, ls, lo - ls); 112 copied += (lo - ls); 113 b += (lo - ls); 114 115 /* copy everything we needed to re-order back into array */ 116 memcpy (a1, buf, copied); 117 return; 118 } else { 119 /* everything already in order */ 120 return; 121 } 122 } while (hi < ah); 123 124 if (lo < am) { 125 memcpy (b, lo, am - lo); 126 copied += (am - lo); 127 } 128 129 memcpy (a1, buf, copied); 130} 131 132static int 133MergeSort (void *base, size_t nmemb, size_t size, 134 int (* compare) (const void *, const void *, void *), void *arg) 135{ 136 void *tmp; 137 138 if (nmemb < 2) 139 return 0; 140 141 if (!(tmp = malloc (nmemb * size))) { 142 errno = ENOMEM; 143 return -1; 144 } 145 146 msort (base, tmp, 0, nmemb - 1, size, compare, arg); 147 148 free (tmp); 149 150 return 0; 151} 152 153int isl_sort(void *const pbase, size_t total_elems, size_t size, 154 int (*cmp)(const void *, const void *, void *arg), void *arg) 155{ 156 return MergeSort (pbase, total_elems, size, cmp, arg); 157} 158