1/*
2 * Copyright (c) 2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. Please obtain a copy of the License at
10 * http://www.opensource.apple.com/apsl/ and read it before using this
11 * file.
12 *
13 * The Original Code and all software distributed under the License are
14 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
15 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
16 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
18 * Please see the License for the specific language governing rights and
19 * limitations under the License.
20 *
21 * @APPLE_LICENSE_HEADER_END@
22 */
23
24/*	CFSortFunctions.c
25	Copyright (c) 1999-2013, Apple Inc. All rights reserved.
26	Responsibility: Christopher Kane
27*/
28
29#include <CoreFoundation/CFBase.h>
30#include "CFInternal.h"
31#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
32#include <dispatch/dispatch.h>
33#endif
34#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED
35#include <dispatch/private.h>
36#endif
37
38enum {
39    kCFSortConcurrent = (1 << 0),
40    kCFSortStable = (1 << 4),
41};
42
43typedef CFIndex VALUE_TYPE;
44typedef CFIndex INDEX_TYPE;
45typedef CFComparisonResult CMP_RESULT_TYPE;
46typedef CMP_RESULT_TYPE (^COMPARATOR_BLOCK)(VALUE_TYPE, VALUE_TYPE);
47
48/*
49Number of elements in a list and expected number of compares,
50when the initial short-circuiting compare is not done.
511	0
522	1
533	2.667
544	4.667
555	7.167
566	9.833
577	12.733
588	15.733
599	19.167
6010	22.667
6111	26.2857
6212	29.9524
63*/
64
65static void __CFSimpleMerge(VALUE_TYPE listp[], INDEX_TYPE cnt1, INDEX_TYPE cnt2, VALUE_TYPE tmp[], COMPARATOR_BLOCK cmp) {
66    if (cnt1 <= 0 || cnt2 <= 0) return;
67    // if the last element of listp1 <= the first of listp2, lists are already ordered
68    if (16 < cnt1 + cnt2 && cmp(listp[cnt1 - 1], listp[cnt1]) <= 0) return;
69
70    INDEX_TYPE idx = 0, idx1 = 0, idx2 = cnt1;
71    for (;;) {
72        if (cnt1 <= idx1) {
73            while (idx--) {
74                listp[idx] = tmp[idx];
75            }
76            return;
77        }
78        if (cnt1 + cnt2 <= idx2) {
79            for (INDEX_TYPE t = cnt1 + cnt2 - 1; idx <= t; t--) {
80                listp[t] = listp[t - cnt2];
81            }
82            while (idx--) {
83                listp[idx] = tmp[idx];
84            }
85            return;
86        }
87        VALUE_TYPE v1 = listp[idx1], v2 = listp[idx2];
88        if (cmp(v1, v2) <= 0) {
89            tmp[idx] = v1;
90            idx1++;
91        } else {
92            tmp[idx] = v2;
93            idx2++;
94        }
95        idx++;
96    }
97}
98
99static void __CFSimpleMergeSort(VALUE_TYPE listp[], INDEX_TYPE cnt, VALUE_TYPE tmp[], COMPARATOR_BLOCK cmp) {
100    if (cnt < 2) {
101        /* do nothing */
102    } else if (2 == cnt) {
103        VALUE_TYPE v0 = listp[0], v1 = listp[1];
104        if (0 < cmp(v0, v1)) {
105            listp[0] = v1;
106            listp[1] = v0;
107        }
108    } else if (3 == cnt) {
109        VALUE_TYPE v0 = listp[0], v1 = listp[1], v2 = listp[2], vt;
110        if (0 < cmp(v0, v1)) {
111            vt = v0;
112            v0 = v1;
113            v1 = vt;
114        }
115        if (0 < cmp(v1, v2)) {
116            vt = v1;
117            v1 = v2;
118            v2 = vt;
119            if (0 < cmp(v0, v1)) {
120                vt = v0;
121                v0 = v1;
122                v1 = vt;
123            }
124        }
125        listp[0] = v0;
126        listp[1] = v1;
127        listp[2] = v2;
128    } else {
129        INDEX_TYPE half_cnt = cnt / 2;
130        __CFSimpleMergeSort(listp, half_cnt, tmp, cmp);
131        __CFSimpleMergeSort(listp + half_cnt, cnt - half_cnt, tmp, cmp);
132        __CFSimpleMerge(listp, half_cnt, cnt - half_cnt, tmp, cmp);
133    }
134}
135
136#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
137// Excluded from linux for dispatch dependency
138
139// if !right, put the cnt1 smallest values in tmp, else put the cnt2 largest values in tmp
140static void __CFSortIndexesNMerge(VALUE_TYPE listp1[], INDEX_TYPE cnt1, VALUE_TYPE listp2[], INDEX_TYPE cnt2, VALUE_TYPE tmp[], size_t right, COMPARATOR_BLOCK cmp) {
141    // if the last element of listp1 <= the first of listp2, lists are already ordered
142    if (16 < cnt1 + cnt2 && cmp(listp1[cnt1 - 1], listp2[0]) <= 0) {
143        memmove(tmp, (right ? listp2 : listp1), (right ? cnt2 : cnt1) * sizeof(VALUE_TYPE));
144        return;
145    }
146
147    if (right) {
148        VALUE_TYPE *listp1_end = listp1;
149        VALUE_TYPE *listp2_end = listp2;
150        VALUE_TYPE *tmp_end = tmp;
151        listp1 += cnt1 - 1;
152        listp2 += cnt2 - 1;
153        tmp += cnt2;
154        while (tmp_end < tmp) {
155            tmp--;
156            if (listp2 < listp2_end) {
157                listp1--;
158                *tmp = *listp1;
159            } else if (listp1 < listp1_end) {
160                listp2--;
161                *tmp = *listp2;
162            } else {
163                VALUE_TYPE v1 = *listp1, v2 = *listp2;
164                CMP_RESULT_TYPE res = cmp(v1, v2);
165                if (res <= 0) {
166                    *tmp = v2;
167                    listp2--;
168                } else {
169                    *tmp = v1;
170                    listp1--;
171                }
172            }
173        }
174    } else {
175        VALUE_TYPE *listp1_end = listp1 + cnt1;
176        VALUE_TYPE *listp2_end = listp2 + cnt2;
177        VALUE_TYPE *tmp_end = tmp + cnt1;
178        while (tmp < tmp_end) {
179            if (listp2_end <= listp2) {
180                *tmp = *listp1;
181                listp1++;
182            } else if (listp1_end <= listp1) {
183                *tmp = *listp2;
184                listp2++;
185            } else {
186                VALUE_TYPE v1 = *listp1, v2 = *listp2;
187                CMP_RESULT_TYPE res = cmp(v1, v2);
188                if (res <= 0) {
189                    *tmp = v1;
190                    listp1++;
191                } else {
192                    *tmp = v2;
193                    listp2++;
194                }
195            }
196            tmp++;
197        }
198    }
199}
200
201/* Merging algorithm based on
202    "A New Parallel Sorting Algorithm based on Odd-Even Mergesort", Ezequiel Herruzo, et al
203*/
204static void __CFSortIndexesN(VALUE_TYPE listp[], INDEX_TYPE count, int32_t ncores, CMP_RESULT_TYPE (^cmp)(INDEX_TYPE, INDEX_TYPE)) {
205    /* Divide the array up into up to ncores, multiple-of-16-sized, chunks */
206    INDEX_TYPE sz = ((((count + ncores - 1) / ncores) + 15) / 16) * 16;
207    INDEX_TYPE num_sect = (count + sz - 1) / sz;
208    INDEX_TYPE last_sect_len = count + sz - sz * num_sect;
209
210    STACK_BUFFER_DECL(VALUE_TYPE *, stack_tmps, num_sect);
211    for (INDEX_TYPE idx = 0; idx < num_sect; idx++) {
212        stack_tmps[idx] = (VALUE_TYPE *)malloc(sz * sizeof(VALUE_TYPE));
213    }
214    VALUE_TYPE **tmps = stack_tmps;
215
216    dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, DISPATCH_QUEUE_OVERCOMMIT);
217    dispatch_apply(num_sect, q, ^(size_t sect) {
218            INDEX_TYPE sect_len = (sect < num_sect - 1) ? sz : last_sect_len;
219            __CFSimpleMergeSort(listp + sect * sz, sect_len, tmps[sect], cmp); // naturally stable
220        });
221
222    INDEX_TYPE even_phase_cnt = ((num_sect / 2) * 2);
223    INDEX_TYPE odd_phase_cnt = (((num_sect - 1) / 2) * 2);
224    for (INDEX_TYPE idx = 0; idx < (num_sect + 1) / 2; idx++) {
225        dispatch_apply(even_phase_cnt, q, ^(size_t sect) { // merge even
226                size_t right = sect & (size_t)0x1;
227                VALUE_TYPE *left_base = listp + sect * sz - (right ? sz : 0);
228                VALUE_TYPE *right_base = listp + sect * sz + (right ? 0 : sz);
229                INDEX_TYPE sect2_len = (sect + 1 + (right ? 0 : 1) == num_sect) ? last_sect_len : sz;
230                __CFSortIndexesNMerge(left_base, sz, right_base, sect2_len, tmps[sect], right, cmp);
231            });
232        if (num_sect & 0x1) {
233            memmove(tmps[num_sect - 1], listp + (num_sect - 1) * sz, last_sect_len * sizeof(VALUE_TYPE));
234        }
235        dispatch_apply(odd_phase_cnt, q, ^(size_t sect) { // merge odd
236                size_t right = sect & (size_t)0x1;
237                VALUE_TYPE *left_base = tmps[sect + (right ? 0 : 1)];
238                VALUE_TYPE *right_base = tmps[sect + (right ? 1 : 2)];
239                INDEX_TYPE sect2_len = (sect + 1 + (right ? 1 : 2) == num_sect) ? last_sect_len : sz;
240                __CFSortIndexesNMerge(left_base, sz, right_base, sect2_len, listp + sect * sz + sz, right, cmp);
241            });
242        memmove(listp + 0 * sz, tmps[0], sz * sizeof(VALUE_TYPE));
243        if (!(num_sect & 0x1)) {
244            memmove(listp + (num_sect - 1) * sz, tmps[num_sect - 1], last_sect_len * sizeof(VALUE_TYPE));
245        }
246    }
247
248    for (INDEX_TYPE idx = 0; idx < num_sect; idx++) {
249        free(stack_tmps[idx]);
250    }
251}
252#endif
253
254// fills an array of indexes (of length count) giving the indexes 0 - count-1, as sorted by the comparator block
255void CFSortIndexes(CFIndex *indexBuffer, CFIndex count, CFOptionFlags opts, CFComparisonResult (^cmp)(CFIndex, CFIndex)) {
256    if (count < 1) return;
257    if (INTPTR_MAX / sizeof(CFIndex) < count) return;
258    int32_t ncores = 0;
259    if (opts & kCFSortConcurrent) {
260        ncores = __CFActiveProcessorCount();
261        if (count < 160 || ncores < 2) {
262            opts = (opts & ~kCFSortConcurrent);
263        } else if (count < 640 && 2 < ncores) {
264            ncores = 2;
265        } else if (count < 3200 && 4 < ncores) {
266            ncores = 4;
267        } else if (count < 16000 && 8 < ncores) {
268            ncores = 8;
269        }
270        if (16 < ncores) {
271            ncores = 16;
272        }
273    }
274#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
275    if (count <= 65536) {
276        for (CFIndex idx = 0; idx < count; idx++) indexBuffer[idx] = idx;
277    } else {
278        /* Specifically hard-coded to 8; the count has to be very large before more chunks and/or cores is worthwhile. */
279        CFIndex sz = ((((size_t)count + 15) / 16) * 16) / 8;
280        dispatch_apply(8, dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, DISPATCH_QUEUE_OVERCOMMIT), ^(size_t n) {
281                CFIndex idx = n * sz, lim = __CFMin(idx + sz, count);
282                for (; idx < lim; idx++) indexBuffer[idx] = idx;
283            });
284    }
285#else
286    for (CFIndex idx = 0; idx < count; idx++) indexBuffer[idx] = idx;
287#endif
288#if DEPLOYMENT_TARGET_MACOSX || DEPLOYMENT_TARGET_EMBEDDED || DEPLOYMENT_TARGET_WINDOWS
289    if (opts & kCFSortConcurrent) {
290        __CFSortIndexesN(indexBuffer, count, ncores, cmp); // naturally stable
291        return;
292    }
293#endif
294    STACK_BUFFER_DECL(VALUE_TYPE, local, count <= 4096 ? count : 1);
295    VALUE_TYPE *tmp = (count <= 4096) ? local : (VALUE_TYPE *)malloc(count * sizeof(VALUE_TYPE));
296    __CFSimpleMergeSort(indexBuffer, count, tmp, cmp); // naturally stable
297    if (local != tmp) free(tmp);
298}
299
300/* Comparator is passed the address of the values. */
301void CFQSortArray(void *list, CFIndex count, CFIndex elementSize, CFComparatorFunction comparator, void *context) {
302    if (count < 2 || elementSize < 1) return;
303    STACK_BUFFER_DECL(CFIndex, locali, count <= 4096 ? count : 1);
304    CFIndex *indexes = (count <= 4096) ? locali : (CFIndex *)malloc(count * sizeof(CFIndex));
305    CFSortIndexes(indexes, count, 0, ^(CFIndex a, CFIndex b) { return comparator((char *)list + a * elementSize, (char *)list + b * elementSize, context); });
306    STACK_BUFFER_DECL(uint8_t, locals, count <= (16 * 1024 / elementSize) ? count * elementSize : 1);
307    void *store = (count <= (16 * 1024 / elementSize)) ? locals : malloc(count * elementSize);
308    for (CFIndex idx = 0; idx < count; idx++) {
309        if (sizeof(uintptr_t) == elementSize) {
310            uintptr_t *a = (uintptr_t *)list + indexes[idx];
311            uintptr_t *b = (uintptr_t *)store + idx;
312            *b = *a;
313        } else {
314            memmove((char *)store + idx * elementSize, (char *)list + indexes[idx] * elementSize, elementSize);
315        }
316    }
317    // no swapping or modification of the original list has occurred until this point
318    objc_memmove_collectable(list, store, count * elementSize);
319    if (locals != store) free(store);
320    if (locali != indexes) free(indexes);
321}
322
323/* Comparator is passed the address of the values. */
324void CFMergeSortArray(void *list, CFIndex count, CFIndex elementSize, CFComparatorFunction comparator, void *context) {
325    if (count < 2 || elementSize < 1) return;
326    STACK_BUFFER_DECL(CFIndex, locali, count <= 4096 ? count : 1);
327    CFIndex *indexes = (count <= 4096) ? locali : (CFIndex *)malloc(count * sizeof(CFIndex));
328    CFSortIndexes(indexes, count, kCFSortStable, ^(CFIndex a, CFIndex b) { return comparator((char *)list + a * elementSize, (char *)list + b * elementSize, context); });
329    STACK_BUFFER_DECL(uint8_t, locals, count <= (16 * 1024 / elementSize) ? count * elementSize : 1);
330    void *store = (count <= (16 * 1024 / elementSize)) ? locals : malloc(count * elementSize);
331    for (CFIndex idx = 0; idx < count; idx++) {
332        if (sizeof(uintptr_t) == elementSize) {
333            uintptr_t *a = (uintptr_t *)list + indexes[idx];
334            uintptr_t *b = (uintptr_t *)store + idx;
335            *b = *a;
336        } else {
337            memmove((char *)store + idx * elementSize, (char *)list + indexes[idx] * elementSize, elementSize);
338        }
339    }
340    // no swapping or modification of the original list has occurred until this point
341    objc_memmove_collectable(list, store, count * elementSize);
342    if (locals != store) free(store);
343    if (locali != indexes) free(indexes);
344}
345
346
347