file.c revision 281535
1170530Ssam/*-
2178354Ssam * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
3170530Ssam * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
4170530Ssam * All rights reserved.
5170530Ssam *
6170530Ssam * Redistribution and use in source and binary forms, with or without
7170530Ssam * modification, are permitted provided that the following conditions
8170530Ssam * are met:
9170530Ssam * 1. Redistributions of source code must retain the above copyright
10170530Ssam *    notice, this list of conditions and the following disclaimer.
11170530Ssam * 2. Redistributions in binary form must reproduce the above copyright
12170530Ssam *    notice, this list of conditions and the following disclaimer in the
13170530Ssam *    documentation and/or other materials provided with the distribution.
14170530Ssam *
15170530Ssam * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16170530Ssam * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17170530Ssam * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18170530Ssam * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19170530Ssam * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20170530Ssam * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21170530Ssam * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22170530Ssam * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23170530Ssam * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24170530Ssam * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25170530Ssam * SUCH DAMAGE.
26170530Ssam */
27170530Ssam
28170530Ssam#include <sys/cdefs.h>
29170530Ssam__FBSDID("$FreeBSD: stable/10/usr.bin/sort/file.c 281535 2015-04-14 18:57:50Z pfg $");
30170530Ssam
31170530Ssam#include <sys/mman.h>
32170530Ssam#include <sys/stat.h>
33170530Ssam#include <sys/types.h>
34170530Ssam#include <sys/queue.h>
35170530Ssam
36178354Ssam#include <err.h>
37170530Ssam#include <fcntl.h>
38170530Ssam#if defined(SORT_THREADS)
39170530Ssam#include <pthread.h>
40170530Ssam#endif
41170530Ssam#include <semaphore.h>
42170530Ssam#include <stdio.h>
43170530Ssam#include <stdlib.h>
44170530Ssam#include <string.h>
45170530Ssam#include <unistd.h>
46170530Ssam#include <wchar.h>
47170530Ssam#include <wctype.h>
48170530Ssam
49170530Ssam#include "coll.h"
50178354Ssam#include "file.h"
51170530Ssam#include "radixsort.h"
52170530Ssam
53170530Ssamunsigned long long free_memory = 1000000;
54170530Ssamunsigned long long available_free_memory = 1000000;
55170530Ssam
56178354Ssambool use_mmap;
57178354Ssam
58178354Ssamconst char *tmpdir = "/var/tmp";
59178354Ssamconst char *compress_program;
60178354Ssam
61178354Ssamsize_t max_open_files = 16;
62178354Ssam
63178354Ssam/*
64178354Ssam * How much space we read from file at once
65178354Ssam */
66178354Ssam#define READ_CHUNK (4096)
67178354Ssam
68178354Ssam/*
69178354Ssam * File reader structure
70178354Ssam */
71178354Ssamstruct file_reader
72178354Ssam{
73170530Ssam	struct reader_buffer	 rb;
74170530Ssam	FILE			*file;
75170530Ssam	char			*fname;
76170530Ssam	unsigned char		*buffer;
77170530Ssam	unsigned char		*mmapaddr;
78170530Ssam	unsigned char		*mmapptr;
79170530Ssam	size_t			 bsz;
80170530Ssam	size_t			 cbsz;
81173273Ssam	size_t			 mmapsize;
82173273Ssam	size_t			 strbeg;
83173273Ssam	int			 fd;
84173273Ssam	char			 elsymb;
85173273Ssam};
86178354Ssam
87178354Ssam/*
88178354Ssam * Structure to be used in file merge process.
89184280Ssam */
90184280Ssamstruct file_header
91173273Ssam{
92178354Ssam	struct file_reader		*fr;
93178354Ssam	struct sort_list_item		*si; /* current top line */
94178354Ssam	size_t				 file_pos;
95178354Ssam};
96178354Ssam
97178354Ssam/*
98178354Ssam * List elements of "cleanable" files list.
99178354Ssam */
100178354Ssamstruct CLEANABLE_FILE
101178354Ssam{
102178354Ssam	char				*fn;
103184280Ssam	LIST_ENTRY(CLEANABLE_FILE)	 files;
104178354Ssam};
105178354Ssam
106170530Ssam/*
107178354Ssam * List header of "cleanable" files list.
108178354Ssam */
109170530Ssamstatic LIST_HEAD(CLEANABLE_FILES,CLEANABLE_FILE) tmp_files;
110170530Ssam
111170530Ssam/*
112170530Ssam * Semaphore to protect the tmp file list.
113170530Ssam * We use semaphore here because it is signal-safe, according to POSIX.
114170530Ssam * And semaphore does not require pthread library.
115170530Ssam */
116170530Ssamstatic sem_t tmp_files_sem;
117170530Ssam
118170530Ssamstatic void mt_sort(struct sort_list *list,
119184280Ssam    int (*sort_func)(void *, size_t, size_t,
120184280Ssam    int (*)(const void *, const void *)), const char* fn);
121184280Ssam
122184280Ssam/*
123191552Ssam * Init tmp files list
124191552Ssam */
125191552Ssamvoid
126170530Ssaminit_tmp_files(void)
127170530Ssam{
128170530Ssam
129170530Ssam	LIST_INIT(&tmp_files);
130170530Ssam	sem_init(&tmp_files_sem, 0, 1);
131170530Ssam}
132170530Ssam
133178354Ssam/*
134170530Ssam * Save name of a tmp file for signal cleanup
135170530Ssam */
136170530Ssamvoid
137184280Ssamtmp_file_atexit(const char *tmp_file)
138191552Ssam{
139191552Ssam
140170530Ssam	if (tmp_file) {
141173273Ssam		sem_wait(&tmp_files_sem);
142173273Ssam		struct CLEANABLE_FILE *item =
143178354Ssam		    sort_malloc(sizeof(struct CLEANABLE_FILE));
144173273Ssam		item->fn = sort_strdup(tmp_file);
145178354Ssam		LIST_INSERT_HEAD(&tmp_files, item, files);
146178354Ssam		sem_post(&tmp_files_sem);
147178354Ssam	}
148178354Ssam}
149173273Ssam
150178354Ssam/*
151178354Ssam * Clear tmp files
152178354Ssam */
153178354Ssamvoid
154178354Ssamclear_tmp_files(void)
155178354Ssam{
156178354Ssam	struct CLEANABLE_FILE *item;
157178354Ssam
158178354Ssam	sem_wait(&tmp_files_sem);
159178354Ssam	LIST_FOREACH(item,&tmp_files,files) {
160178354Ssam		if ((item) && (item->fn))
161178354Ssam			unlink(item->fn);
162178354Ssam	}
163178354Ssam	sem_post(&tmp_files_sem);
164178354Ssam}
165178354Ssam
166170530Ssam/*
167173273Ssam * Check whether a file is a temporary file
168173273Ssam */
169170530Ssamstatic bool
170170530Ssamfile_is_tmp(const char* fn)
171178354Ssam{
172178354Ssam	struct CLEANABLE_FILE *item;
173178354Ssam	bool ret = false;
174178354Ssam
175178354Ssam	if (fn) {
176173273Ssam		sem_wait(&tmp_files_sem);
177178354Ssam		LIST_FOREACH(item,&tmp_files,files) {
178178354Ssam			if ((item) && (item->fn))
179178354Ssam				if (strcmp(item->fn, fn) == 0) {
180178354Ssam					ret = true;
181170530Ssam					break;
182183256Ssam				}
183183256Ssam		}
184183256Ssam		sem_post(&tmp_files_sem);
185183256Ssam	}
186170530Ssam
187178354Ssam	return (ret);
188178354Ssam}
189178354Ssam
190178354Ssam/*
191178354Ssam * Generate new temporary file name
192178354Ssam */
193170530Ssamchar *
194178354Ssamnew_tmp_file_name(void)
195178354Ssam{
196178354Ssam	static size_t tfcounter = 0;
197170530Ssam	static const char *fn = ".bsdsort.";
198170530Ssam	char *ret;
199170530Ssam	size_t sz;
200178354Ssam
201170530Ssam	sz = strlen(tmpdir) + 1 + strlen(fn) + 32 + 1;
202170530Ssam	ret = sort_malloc(sz);
203170530Ssam
204170530Ssam	sprintf(ret, "%s/%s%d.%lu", tmpdir, fn, (int) getpid(), (unsigned long)(tfcounter++));
205170530Ssam	tmp_file_atexit(ret);
206170530Ssam	return (ret);
207170530Ssam}
208170530Ssam
209170530Ssam/*
210170530Ssam * Initialize file list
211170530Ssam */
212170530Ssamvoid
213172226Ssamfile_list_init(struct file_list *fl, bool tmp)
214172226Ssam{
215170530Ssam
216170530Ssam	if (fl) {
217178354Ssam		fl->count = 0;
218170530Ssam		fl->sz = 0;
219170530Ssam		fl->fns = NULL;
220170530Ssam		fl->tmp = tmp;
221170530Ssam	}
222170530Ssam}
223170530Ssam
224170530Ssam/*
225170530Ssam * Add a file name to the list
226170530Ssam */
227170530Ssamvoid
228170530Ssamfile_list_add(struct file_list *fl, char *fn, bool allocate)
229170530Ssam{
230170530Ssam
231170530Ssam	if (fl && fn) {
232170530Ssam		if (fl->count >= fl->sz || (fl->fns == NULL)) {
233170530Ssam			fl->sz = (fl->sz) * 2 + 1;
234170530Ssam			fl->fns = sort_realloc(fl->fns, fl->sz *
235170530Ssam			    sizeof(char *));
236170530Ssam		}
237173273Ssam		fl->fns[fl->count] = allocate ? sort_strdup(fn) : fn;
238170530Ssam		fl->count += 1;
239170530Ssam	}
240170530Ssam}
241170530Ssam
242170530Ssam/*
243170530Ssam * Populate file list from array of file names
244170530Ssam */
245170530Ssamvoid
246170530Ssamfile_list_populate(struct file_list *fl, int argc, char **argv, bool allocate)
247170530Ssam{
248170530Ssam
249170530Ssam	if (fl && argv) {
250170530Ssam		int i;
251170530Ssam
252178354Ssam		for (i = 0; i < argc; i++)
253173462Ssam			file_list_add(fl, argv[i], allocate);
254170530Ssam	}
255170530Ssam}
256170530Ssam
257170530Ssam/*
258170530Ssam * Clean file list data and delete the files,
259178354Ssam * if this is a list of temporary files
260170530Ssam */
261170530Ssamvoid
262170530Ssamfile_list_clean(struct file_list *fl)
263170530Ssam{
264170530Ssam
265170530Ssam	if (fl) {
266170530Ssam		if (fl->fns) {
267170530Ssam			size_t i;
268170530Ssam
269170530Ssam			for (i = 0; i < fl->count; i++) {
270178354Ssam				if (fl->fns[i]) {
271173462Ssam					if (fl->tmp)
272178354Ssam						unlink(fl->fns[i]);
273170530Ssam					sort_free(fl->fns[i]);
274170530Ssam					fl->fns[i] = 0;
275173462Ssam				}
276170530Ssam			}
277170530Ssam			sort_free(fl->fns);
278170530Ssam			fl->fns = NULL;
279178354Ssam		}
280170530Ssam		fl->sz = 0;
281170530Ssam		fl->count = 0;
282178354Ssam		fl->tmp = false;
283170530Ssam	}
284170530Ssam}
285170530Ssam
286178354Ssam/*
287170530Ssam * Init sort list
288170530Ssam */
289170530Ssamvoid
290170530Ssamsort_list_init(struct sort_list *l)
291170530Ssam{
292170530Ssam
293170530Ssam	if (l) {
294170530Ssam		l->count = 0;
295170530Ssam		l->size = 0;
296170530Ssam		l->memsize = sizeof(struct sort_list);
297170530Ssam		l->list = NULL;
298170530Ssam	}
299170530Ssam}
300170530Ssam
301170530Ssam/*
302170530Ssam * Add string to sort list
303170530Ssam */
304170530Ssamvoid
305170530Ssamsort_list_add(struct sort_list *l, struct bwstring *str)
306170530Ssam{
307170530Ssam
308170530Ssam	if (l && str) {
309170530Ssam		size_t indx = l->count;
310170530Ssam
311170530Ssam		if ((l->list == NULL) || (indx >= l->size)) {
312170530Ssam			size_t newsize = (l->size + 1) + 1024;
313170530Ssam
314170530Ssam			l->list = sort_realloc(l->list,
315170530Ssam			    sizeof(struct sort_list_item*) * newsize);
316170530Ssam			l->memsize += (newsize - l->size) *
317170530Ssam			    sizeof(struct sort_list_item*);
318170530Ssam			l->size = newsize;
319170530Ssam		}
320170530Ssam		l->list[indx] = sort_list_item_alloc();
321170530Ssam		sort_list_item_set(l->list[indx], str);
322170530Ssam		l->memsize += sort_list_item_size(l->list[indx]);
323178354Ssam		l->count += 1;
324178354Ssam	}
325191552Ssam}
326191552Ssam
327191552Ssam/*
328178354Ssam * Clean sort list data
329191552Ssam */
330191552Ssamvoid
331178354Ssamsort_list_clean(struct sort_list *l)
332178354Ssam{
333178354Ssam
334178354Ssam	if (l) {
335178354Ssam		if (l->list) {
336178354Ssam			size_t i;
337178354Ssam
338178354Ssam			for (i = 0; i < l->count; i++) {
339178354Ssam				struct sort_list_item *item;
340178354Ssam
341191552Ssam				item = l->list[i];
342178354Ssam
343191552Ssam				if (item) {
344191552Ssam					sort_list_item_clean(item);
345178354Ssam					sort_free(item);
346178354Ssam					l->list[i] = NULL;
347178354Ssam				}
348170530Ssam			}
349170530Ssam			sort_free(l->list);
350170530Ssam			l->list = NULL;
351191552Ssam		}
352170530Ssam		l->count = 0;
353170530Ssam		l->size = 0;
354178354Ssam		l->memsize = sizeof(struct sort_list);
355170530Ssam	}
356170530Ssam}
357170530Ssam
358170530Ssam/*
359170530Ssam * Write sort list to file
360183247Ssam */
361170530Ssamvoid
362170530Ssamsort_list_dump(struct sort_list *l, const char *fn)
363170530Ssam{
364170530Ssam
365170530Ssam	if (l && fn) {
366183247Ssam		FILE *f;
367183247Ssam
368178354Ssam		f = openfile(fn, "w");
369170530Ssam		if (f == NULL)
370170530Ssam			err(2, NULL);
371170530Ssam
372170530Ssam		if (l->list) {
373170530Ssam			size_t i;
374170530Ssam			if (!(sort_opts_vals.uflag)) {
375170530Ssam				for (i = 0; i < l->count; ++i)
376170530Ssam					bwsfwrite(l->list[i]->str, f,
377170530Ssam					    sort_opts_vals.zflag);
378170530Ssam			} else {
379170530Ssam				struct sort_list_item *last_printed_item = NULL;
380170530Ssam				struct sort_list_item *item;
381170530Ssam				for (i = 0; i < l->count; ++i) {
382178354Ssam					item = l->list[i];
383170530Ssam					if ((last_printed_item == NULL) ||
384170530Ssam					    list_coll(&last_printed_item, &item)) {
385170530Ssam						bwsfwrite(item->str, f, sort_opts_vals.zflag);
386170530Ssam						last_printed_item = item;
387170530Ssam					}
388170530Ssam				}
389170530Ssam			}
390170530Ssam		}
391170530Ssam
392170530Ssam		closefile(f, fn);
393170530Ssam	}
394170530Ssam}
395170530Ssam
396170530Ssam/*
397170530Ssam * Checks if the given file is sorted.  Stops at the first disorder,
398170530Ssam * prints the disordered line and returns 1.
399170530Ssam */
400170530Ssamint
401170530Ssamcheck(const char *fn)
402170530Ssam{
403170530Ssam	struct bwstring *s1, *s2, *s1disorder, *s2disorder;
404170530Ssam	struct file_reader *fr;
405170530Ssam	struct keys_array *ka1, *ka2;
406170530Ssam	int res;
407170530Ssam	size_t pos, posdisorder;
408170530Ssam
409170530Ssam	s1 = s2 = s1disorder = s2disorder = NULL;
410170530Ssam	ka1 = ka2 = NULL;
411170530Ssam
412178354Ssam	fr = file_reader_init(fn);
413170530Ssam
414173273Ssam	res = 0;
415173273Ssam	pos = 1;
416173273Ssam	posdisorder = 1;
417173273Ssam
418173273Ssam	if (fr == NULL) {
419178354Ssam		err(2, NULL);
420170530Ssam		goto end;
421170530Ssam	}
422173273Ssam
423170530Ssam	s1 = file_reader_readline(fr);
424173273Ssam	if (s1 == NULL)
425170530Ssam		goto end;
426170530Ssam
427173273Ssam	ka1 = keys_array_alloc();
428170530Ssam	preproc(s1, ka1);
429178354Ssam
430170530Ssam	s2 = file_reader_readline(fr);
431170530Ssam	if (s2 == NULL)
432170530Ssam		goto end;
433173273Ssam
434170530Ssam	ka2 = keys_array_alloc();
435170530Ssam	preproc(s2, ka2);
436170530Ssam
437170530Ssam	for (;;) {
438170530Ssam
439173273Ssam		if (debug_sort) {
440178354Ssam			bwsprintf(stdout, s2, "s1=<", ">");
441173273Ssam			bwsprintf(stdout, s1, "s2=<", ">");
442170530Ssam		}
443173273Ssam		int cmp = key_coll(ka2, ka1, 0);
444170530Ssam		if (debug_sort)
445170530Ssam			printf("; cmp1=%d", cmp);
446170530Ssam
447173273Ssam		if (!cmp && sort_opts_vals.complex_sort &&
448170530Ssam		    !(sort_opts_vals.uflag) && !(sort_opts_vals.sflag)) {
449170530Ssam			cmp = top_level_str_coll(s2, s1);
450173273Ssam			if (debug_sort)
451173273Ssam				printf("; cmp2=%d", cmp);
452173273Ssam		}
453173273Ssam		if (debug_sort)
454173273Ssam			printf("\n");
455173273Ssam
456173273Ssam		if ((sort_opts_vals.uflag && (cmp <= 0)) || (cmp < 0)) {
457173273Ssam			if (!(sort_opts_vals.csilentflag)) {
458178354Ssam				s2disorder = bwsdup(s2);
459173273Ssam				posdisorder = pos;
460173273Ssam				if (debug_sort)
461173273Ssam					s1disorder = bwsdup(s1);
462173273Ssam			}
463173273Ssam			res = 1;
464173273Ssam			goto end;
465173273Ssam		}
466173273Ssam
467173273Ssam		pos++;
468173273Ssam
469173273Ssam		clean_keys_array(s1, ka1);
470173273Ssam		sort_free(ka1);
471173273Ssam		ka1 = ka2;
472173273Ssam		ka2 = NULL;
473173273Ssam
474173273Ssam		bwsfree(s1);
475173273Ssam		s1 = s2;
476173273Ssam
477178354Ssam		s2 = file_reader_readline(fr);
478173273Ssam		if (s2 == NULL)
479173273Ssam			goto end;
480173273Ssam
481173273Ssam		ka2 = keys_array_alloc();
482173273Ssam		preproc(s2, ka2);
483173273Ssam	}
484173273Ssam
485173273Ssamend:
486173273Ssam	if (ka1) {
487173273Ssam		clean_keys_array(s1, ka1);
488173273Ssam		sort_free(ka1);
489173273Ssam	}
490173273Ssam
491173273Ssam	if (s1)
492178354Ssam		bwsfree(s1);
493178354Ssam
494178354Ssam	if (ka2) {
495178354Ssam		clean_keys_array(s2, ka2);
496173273Ssam		sort_free(ka2);
497173273Ssam	}
498173273Ssam
499173273Ssam	if (s2)
500173273Ssam		bwsfree(s2);
501173273Ssam
502173273Ssam	if ((fn == NULL) || (*fn == 0) || (strcmp(fn, "-") == 0)) {
503173273Ssam		for (;;) {
504173273Ssam			s2 = file_reader_readline(fr);
505173273Ssam			if (s2 == NULL)
506173273Ssam				break;
507173273Ssam			bwsfree(s2);
508173273Ssam		}
509178354Ssam	}
510173273Ssam
511173273Ssam	file_reader_free(fr);
512173273Ssam
513173273Ssam	if (s2disorder) {
514173273Ssam		bws_disorder_warnx(s2disorder, fn, posdisorder);
515173273Ssam		if (s1disorder) {
516173273Ssam			bws_disorder_warnx(s1disorder, fn, posdisorder);
517173273Ssam			if (s1disorder != s2disorder)
518173273Ssam				bwsfree(s1disorder);
519173273Ssam		}
520173273Ssam		bwsfree(s2disorder);
521170530Ssam		s1disorder = NULL;
522170530Ssam		s2disorder = NULL;
523170530Ssam	}
524173273Ssam
525170530Ssam	if (res)
526170530Ssam		exit(res);
527170530Ssam
528170530Ssam	return (0);
529170530Ssam}
530170530Ssam
531170530Ssam/*
532170530Ssam * Opens a file.  If the given filename is "-", stdout will be
533173273Ssam * opened.
534173273Ssam */
535178354SsamFILE *
536170530Ssamopenfile(const char *fn, const char *mode)
537170530Ssam{
538170530Ssam	FILE *file;
539170530Ssam
540170530Ssam	if (strcmp(fn, "-") == 0) {
541170530Ssam		return ((mode && mode[0] == 'r') ? stdin : stdout);
542183247Ssam	} else {
543183247Ssam		mode_t orig_file_mask = 0;
544170530Ssam		int is_tmp = file_is_tmp(fn);
545170530Ssam
546170530Ssam		if (is_tmp && (mode[0] == 'w'))
547170530Ssam			orig_file_mask = umask(S_IWGRP | S_IWOTH |
548183247Ssam			    S_IRGRP | S_IROTH);
549183247Ssam
550183247Ssam		if (is_tmp && (compress_program != NULL)) {
551183247Ssam			char *cmd;
552183247Ssam			size_t cmdsz;
553183247Ssam
554183247Ssam			cmdsz = strlen(fn) + 128;
555173273Ssam			cmd = sort_malloc(cmdsz);
556173273Ssam
557173273Ssam			fflush(stdout);
558173273Ssam
559173273Ssam			if (mode[0] == 'r')
560170530Ssam				snprintf(cmd, cmdsz - 1, "cat %s | %s -d",
561170530Ssam				    fn, compress_program);
562170530Ssam			else if (mode[0] == 'w')
563170530Ssam				snprintf(cmd, cmdsz - 1, "%s > %s",
564170530Ssam				    compress_program, fn);
565173273Ssam			else
566170530Ssam				err(2, "%s", getstr(7));
567182827Ssam
568182827Ssam			if ((file = popen(cmd, mode)) == NULL)
569182827Ssam				err(2, NULL);
570182827Ssam
571182827Ssam			sort_free(cmd);
572182827Ssam
573182827Ssam		} else
574182827Ssam			if ((file = fopen(fn, mode)) == NULL)
575182827Ssam				err(2, NULL);
576182827Ssam
577182827Ssam		if (is_tmp && (mode[0] == 'w'))
578182827Ssam			umask(orig_file_mask);
579182827Ssam	}
580182827Ssam
581182827Ssam	return (file);
582173273Ssam}
583173273Ssam
584170530Ssam/*
585170530Ssam * Close file
586170530Ssam */
587170530Ssamvoid
588170530Ssamclosefile(FILE *f, const char *fn)
589170530Ssam{
590170530Ssam	if (f == NULL) {
591170530Ssam		;
592170530Ssam	} else if (f == stdin) {
593170530Ssam		;
594170530Ssam	} else if (f == stdout) {
595173273Ssam		fflush(f);
596170530Ssam	} else {
597170530Ssam		if (file_is_tmp(fn) && compress_program != NULL) {
598170530Ssam			if(pclose(f)<0)
599170530Ssam				err(2,NULL);
600170530Ssam		} else
601170530Ssam			fclose(f);
602173273Ssam	}
603170530Ssam}
604170530Ssam
605170530Ssam/*
606173273Ssam * Reads a file into the internal buffer.
607170530Ssam */
608170530Ssamstruct file_reader *
609170530Ssamfile_reader_init(const char *fsrc)
610173273Ssam{
611170530Ssam	struct file_reader *ret;
612173273Ssam
613173273Ssam	if (fsrc == NULL)
614173273Ssam		fsrc = "-";
615173273Ssam
616173273Ssam	ret = sort_malloc(sizeof(struct file_reader));
617173273Ssam	memset(ret, 0, sizeof(struct file_reader));
618173273Ssam
619173273Ssam	ret->elsymb = '\n';
620173273Ssam	if (sort_opts_vals.zflag)
621173273Ssam		ret->elsymb = 0;
622173273Ssam
623173273Ssam	ret->fname = sort_strdup(fsrc);
624173273Ssam
625173273Ssam	if (strcmp(fsrc, "-") && (compress_program == NULL) && use_mmap) {
626170530Ssam
627173273Ssam		do {
628173273Ssam			struct stat stat_buf;
629173273Ssam			void *addr;
630173273Ssam			size_t sz = 0;
631173273Ssam			int fd, flags;
632170530Ssam
633173273Ssam			flags = MAP_NOCORE | MAP_NOSYNC;
634173273Ssam			addr = MAP_FAILED;
635173273Ssam
636173273Ssam			fd = open(fsrc, O_RDONLY);
637173273Ssam			if (fd < 0)
638173273Ssam				err(2, NULL);
639173273Ssam
640173273Ssam			if (fstat(fd, &stat_buf) < 0) {
641178354Ssam				close(fd);
642173273Ssam				break;
643173273Ssam			}
644173273Ssam
645173273Ssam			sz = stat_buf.st_size;
646173273Ssam
647173273Ssam#if defined(MAP_PREFAULT_READ)
648173273Ssam			flags |= MAP_PREFAULT_READ;
649173273Ssam#endif
650173273Ssam
651173273Ssam			addr = mmap(NULL, sz, PROT_READ, flags, fd, 0);
652173273Ssam			if (addr == MAP_FAILED) {
653173273Ssam				close(fd);
654173273Ssam				break;
655173273Ssam			}
656173273Ssam
657173273Ssam			ret->fd = fd;
658173273Ssam			ret->mmapaddr = addr;
659173273Ssam			ret->mmapsize = sz;
660178354Ssam			ret->mmapptr = ret->mmapaddr;
661173273Ssam
662178354Ssam		} while (0);
663173273Ssam	}
664173273Ssam
665173273Ssam	if (ret->mmapaddr == NULL) {
666173273Ssam		ret->file = openfile(fsrc, "r");
667173273Ssam		if (ret->file == NULL)
668178354Ssam			err(2, NULL);
669173273Ssam
670173273Ssam		if (strcmp(fsrc, "-")) {
671173273Ssam			ret->cbsz = READ_CHUNK;
672173273Ssam			ret->buffer = sort_malloc(ret->cbsz);
673173273Ssam			ret->bsz = 0;
674173273Ssam			ret->strbeg = 0;
675173273Ssam
676173273Ssam			ret->bsz = fread(ret->buffer, 1, ret->cbsz, ret->file);
677173273Ssam			if (ret->bsz == 0) {
678173273Ssam				if (ferror(ret->file))
679173273Ssam					err(2, NULL);
680178354Ssam			}
681173273Ssam		}
682170530Ssam	}
683173273Ssam
684170530Ssam	return (ret);
685178354Ssam}
686170530Ssam
687173273Ssamstruct bwstring *
688173273Ssamfile_reader_readline(struct file_reader *fr)
689173273Ssam{
690173273Ssam	struct bwstring *ret = NULL;
691173273Ssam
692173273Ssam	if (fr->mmapaddr) {
693173273Ssam		unsigned char *mmapend;
694173273Ssam
695173273Ssam		mmapend = fr->mmapaddr + fr->mmapsize;
696173273Ssam		if (fr->mmapptr >= mmapend)
697173273Ssam			return (NULL);
698173273Ssam		else {
699170530Ssam			unsigned char *strend;
700170530Ssam			size_t sz;
701173273Ssam
702173273Ssam			sz = mmapend - fr->mmapptr;
703170530Ssam			strend = memchr(fr->mmapptr, fr->elsymb, sz);
704178354Ssam
705173273Ssam			if (strend == NULL) {
706178354Ssam				ret = bwscsbdup(fr->mmapptr, sz);
707173273Ssam				fr->mmapptr = mmapend;
708173273Ssam			} else {
709173273Ssam				ret = bwscsbdup(fr->mmapptr, strend -
710173273Ssam				    fr->mmapptr);
711178354Ssam				fr->mmapptr = strend + 1;
712173273Ssam			}
713170530Ssam		}
714173273Ssam
715170530Ssam	} else if (fr->file != stdin) {
716173273Ssam		unsigned char *strend;
717173273Ssam		size_t bsz1, remsz, search_start;
718170530Ssam
719170530Ssam		search_start = 0;
720170530Ssam		remsz = 0;
721170530Ssam		strend = NULL;
722170530Ssam
723170530Ssam		if (fr->bsz > fr->strbeg)
724173273Ssam			remsz = fr->bsz - fr->strbeg;
725170530Ssam
726170530Ssam		/* line read cycle */
727170530Ssam		for (;;) {
728170530Ssam			if (remsz > search_start)
729178354Ssam				strend = memchr(fr->buffer + fr->strbeg +
730170530Ssam				    search_start, fr->elsymb, remsz -
731170530Ssam				    search_start);
732170530Ssam			else
733170530Ssam				strend = NULL;
734170530Ssam
735173273Ssam			if (strend)
736173273Ssam				break;
737178354Ssam			if (feof(fr->file))
738173273Ssam				break;
739173273Ssam
740178354Ssam			if (fr->bsz != fr->cbsz)
741173273Ssam				/* NOTREACHED */
742173273Ssam				err(2, "File read software error 1");
743170530Ssam
744170530Ssam			if (remsz > (READ_CHUNK >> 1)) {
745170530Ssam				search_start = fr->cbsz - fr->strbeg;
746170530Ssam				fr->cbsz += READ_CHUNK;
747170530Ssam				fr->buffer = sort_realloc(fr->buffer,
748170530Ssam				    fr->cbsz);
749170530Ssam				bsz1 = fread(fr->buffer + fr->bsz, 1,
750170530Ssam				    READ_CHUNK, fr->file);
751178354Ssam				if (bsz1 == 0) {
752170530Ssam					if (ferror(fr->file))
753170530Ssam						err(2, NULL);
754178354Ssam					break;
755170530Ssam				}
756170530Ssam				fr->bsz += bsz1;
757178354Ssam				remsz += bsz1;
758170530Ssam			} else {
759173273Ssam				if (remsz > 0 && fr->strbeg>0)
760173273Ssam					bcopy(fr->buffer + fr->strbeg,
761170530Ssam					    fr->buffer, remsz);
762170530Ssam
763173273Ssam				fr->strbeg = 0;
764170530Ssam				search_start = remsz;
765173273Ssam				bsz1 = fread(fr->buffer + remsz, 1,
766173273Ssam				    fr->cbsz - remsz, fr->file);
767170530Ssam				if (bsz1 == 0) {
768178354Ssam					if (ferror(fr->file))
769173273Ssam						err(2, NULL);
770170530Ssam					break;
771173273Ssam				}
772173273Ssam				fr->bsz = remsz + bsz1;
773178354Ssam				remsz = fr->bsz;
774173273Ssam			}
775173273Ssam		}
776173273Ssam
777173273Ssam		if (strend == NULL)
778173273Ssam			strend = fr->buffer + fr->bsz;
779173273Ssam
780173273Ssam		if ((fr->buffer + fr->strbeg <= strend) &&
781173273Ssam		    (fr->strbeg < fr->bsz) && (remsz>0))
782173273Ssam			ret = bwscsbdup(fr->buffer + fr->strbeg, strend -
783170530Ssam			    fr->buffer - fr->strbeg);
784173273Ssam
785170530Ssam		fr->strbeg = (strend - fr->buffer) + 1;
786173273Ssam
787173273Ssam	} else {
788170530Ssam		size_t len = 0;
789178354Ssam
790173273Ssam		ret = bwsfgetln(fr->file, &len, sort_opts_vals.zflag,
791173273Ssam		    &(fr->rb));
792173273Ssam	}
793173273Ssam
794173273Ssam	return (ret);
795173273Ssam}
796178354Ssam
797173273Ssamstatic void
798170530Ssamfile_reader_clean(struct file_reader *fr)
799170530Ssam{
800170530Ssam
801170530Ssam	if (fr) {
802170530Ssam		if (fr->mmapaddr)
803170530Ssam			munmap(fr->mmapaddr, fr->mmapsize);
804170530Ssam
805170530Ssam		if (fr->fd)
806170530Ssam			close(fr->fd);
807183254Ssam
808170530Ssam		if (fr->buffer)
809170530Ssam			sort_free(fr->buffer);
810170530Ssam
811170530Ssam		if (fr->file)
812173273Ssam			if (fr->file != stdin)
813173273Ssam				closefile(fr->file, fr->fname);
814173273Ssam
815173273Ssam		if(fr->fname)
816173273Ssam			sort_free(fr->fname);
817173273Ssam
818173273Ssam		memset(fr, 0, sizeof(struct file_reader));
819173273Ssam	}
820170530Ssam}
821170530Ssam
822170530Ssamvoid
823184280Ssamfile_reader_free(struct file_reader *fr)
824173273Ssam{
825170530Ssam
826173273Ssam	if (fr) {
827170530Ssam		file_reader_clean(fr);
828170530Ssam		sort_free(fr);
829170530Ssam	}
830170530Ssam}
831170530Ssam
832170530Ssamint
833170530Ssamprocfile(const char *fsrc, struct sort_list *list, struct file_list *fl)
834170530Ssam{
835170530Ssam	struct file_reader *fr;
836191552Ssam
837170530Ssam	fr = file_reader_init(fsrc);
838170530Ssam	if (fr == NULL)
839170530Ssam		err(2, NULL);
840170530Ssam
841170530Ssam	/* file browse cycle */
842170530Ssam	for (;;) {
843170530Ssam		struct bwstring *bws;
844184280Ssam
845184280Ssam		bws = file_reader_readline(fr);
846170530Ssam
847170530Ssam		if (bws == NULL)
848191552Ssam			break;
849170530Ssam
850170530Ssam		sort_list_add(list, bws);
851182828Ssam
852170530Ssam		if (list->memsize >= available_free_memory) {
853170530Ssam			char *fn;
854178354Ssam
855178354Ssam			fn = new_tmp_file_name();
856178354Ssam			sort_list_to_file(list, fn);
857178354Ssam			file_list_add(fl, fn, false);
858178354Ssam			sort_list_clean(list);
859178354Ssam		}
860178354Ssam	}
861178354Ssam
862178354Ssam	file_reader_free(fr);
863178354Ssam
864178354Ssam	return (0);
865178354Ssam}
866178354Ssam
867178354Ssam/*
868178354Ssam * Compare file headers. Files with EOF always go to the end of the list.
869178354Ssam */
870178354Ssamstatic int
871178354Ssamfile_header_cmp(struct file_header *f1, struct file_header *f2)
872178354Ssam{
873178354Ssam
874178354Ssam	if (f1 == f2)
875178354Ssam		return (0);
876178354Ssam	else {
877178354Ssam		if (f1->fr == NULL) {
878178354Ssam			return ((f2->fr == NULL) ? 0 : +1);
879178354Ssam		} else if (f2->fr == NULL)
880178354Ssam			return (-1);
881178354Ssam		else {
882178354Ssam			int ret;
883178354Ssam
884178354Ssam			ret = list_coll(&(f1->si), &(f2->si));
885178354Ssam			if (!ret)
886178354Ssam				return ((f1->file_pos < f2->file_pos) ? -1 : +1);
887178354Ssam			return (ret);
888178354Ssam		}
889178354Ssam	}
890178354Ssam}
891178354Ssam
892178354Ssam/*
893173273Ssam * Allocate and init file header structure
894173273Ssam */
895173273Ssamstatic void
896173273Ssamfile_header_init(struct file_header **fh, const char *fn, size_t file_pos)
897173273Ssam{
898173273Ssam
899173273Ssam	if (fh && fn) {
900173273Ssam		struct bwstring *line;
901173273Ssam
902173273Ssam		*fh = sort_malloc(sizeof(struct file_header));
903173273Ssam		(*fh)->file_pos = file_pos;
904173273Ssam		(*fh)->fr = file_reader_init(fn);
905173273Ssam		if ((*fh)->fr == NULL) {
906173273Ssam			perror(fn);
907173273Ssam			err(2, "%s", getstr(8));
908173273Ssam		}
909173273Ssam		line = file_reader_readline((*fh)->fr);
910173273Ssam		if (line == NULL) {
911173273Ssam			file_reader_free((*fh)->fr);
912173273Ssam			(*fh)->fr = NULL;
913173273Ssam			(*fh)->si = NULL;
914173273Ssam		} else {
915173273Ssam			(*fh)->si = sort_list_item_alloc();
916173273Ssam			sort_list_item_set((*fh)->si, line);
917173273Ssam		}
918173273Ssam	}
919173273Ssam}
920173273Ssam
921173273Ssam/*
922173273Ssam * Close file
923173273Ssam */
924173273Ssamstatic void
925173273Ssamfile_header_close(struct file_header **fh)
926173273Ssam{
927173273Ssam
928173273Ssam	if (fh && *fh) {
929173273Ssam		if ((*fh)->fr) {
930173273Ssam			file_reader_free((*fh)->fr);
931173273Ssam			(*fh)->fr = NULL;
932173273Ssam		}
933173273Ssam		if ((*fh)->si) {
934173273Ssam			sort_list_item_clean((*fh)->si);
935173273Ssam			sort_free((*fh)->si);
936173273Ssam			(*fh)->si = NULL;
937173273Ssam		}
938173273Ssam		sort_free(*fh);
939173273Ssam		*fh = NULL;
940173273Ssam	}
941173273Ssam}
942173273Ssam
943173273Ssam/*
944173273Ssam * Swap two array elements
945178354Ssam */
946173273Ssamstatic void
947173273Ssamfile_header_swap(struct file_header **fh, size_t i1, size_t i2)
948173273Ssam{
949178354Ssam	struct file_header *tmp;
950173273Ssam
951173273Ssam	tmp = fh[i1];
952173273Ssam	fh[i1] = fh[i2];
953173273Ssam	fh[i2] = tmp;
954173273Ssam}
955173273Ssam
956173273Ssam/* heap algorithm ==>> */
957173273Ssam
958178354Ssam/*
959178354Ssam * See heap sort algorithm
960173273Ssam * "Raises" last element to its right place
961173273Ssam */
962178354Ssamstatic void
963173273Ssamfile_header_heap_swim(struct file_header **fh, size_t indx)
964173273Ssam{
965173273Ssam
966173273Ssam	if (indx > 0) {
967173273Ssam		size_t parent_index;
968173273Ssam
969173273Ssam		parent_index = (indx - 1) >> 1;
970173273Ssam
971178354Ssam		if (file_header_cmp(fh[indx], fh[parent_index]) < 0) {
972173273Ssam			/* swap child and parent and continue */
973173273Ssam			file_header_swap(fh, indx, parent_index);
974173273Ssam			file_header_heap_swim(fh, parent_index);
975173273Ssam		}
976173273Ssam	}
977173273Ssam}
978183256Ssam
979183256Ssam/*
980183256Ssam * Sink the top element to its correct position
981173273Ssam */
982173273Ssamstatic void
983173273Ssamfile_header_heap_sink(struct file_header **fh, size_t indx, size_t size)
984173273Ssam{
985173273Ssam	size_t left_child_index;
986173273Ssam	size_t right_child_index;
987173273Ssam
988173273Ssam	left_child_index = indx + indx + 1;
989173273Ssam	right_child_index = left_child_index + 1;
990173273Ssam
991173273Ssam	if (left_child_index < size) {
992173273Ssam		size_t min_child_index;
993173273Ssam
994173273Ssam		min_child_index = left_child_index;
995173273Ssam
996173273Ssam		if ((right_child_index < size) &&
997173273Ssam		    (file_header_cmp(fh[left_child_index],
998173273Ssam		    fh[right_child_index]) > 0))
999178354Ssam			min_child_index = right_child_index;
1000178354Ssam		if (file_header_cmp(fh[indx], fh[min_child_index]) > 0) {
1001178354Ssam			file_header_swap(fh, indx, min_child_index);
1002178354Ssam			file_header_heap_sink(fh, min_child_index, size);
1003178354Ssam		}
1004178354Ssam	}
1005178354Ssam}
1006178354Ssam
1007183253Ssam/* <<== heap algorithm */
1008183253Ssam
1009183253Ssam/*
1010178354Ssam * Adds element to the "left" end
1011178354Ssam */
1012178354Ssamstatic void
1013178354Ssamfile_header_list_rearrange_from_header(struct file_header **fh, size_t size)
1014178354Ssam{
1015178354Ssam
1016178354Ssam	file_header_heap_sink(fh, 0, size);
1017178354Ssam}
1018178354Ssam
1019178354Ssam/*
1020178354Ssam * Adds element to the "right" end
1021178354Ssam */
1022178354Ssamstatic void
1023178354Ssamfile_header_list_push(struct file_header *f, struct file_header **fh, size_t size)
1024178354Ssam{
1025178354Ssam
1026173273Ssam	fh[size++] = f;
1027173273Ssam	file_header_heap_swim(fh, size - 1);
1028173273Ssam}
1029173273Ssam
1030173273Ssamstruct last_printed
1031173273Ssam{
1032173273Ssam	struct bwstring *str;
1033173273Ssam};
1034173273Ssam
1035173273Ssam/*
1036173273Ssam * Prints the current line of the file
1037178354Ssam */
1038178354Ssamstatic void
1039178354Ssamfile_header_print(struct file_header *fh, FILE *f_out, struct last_printed *lp)
1040178354Ssam{
1041173273Ssam
1042178354Ssam	if (fh && fh->fr && f_out && fh->si && fh->si->str) {
1043178354Ssam		if (sort_opts_vals.uflag) {
1044178354Ssam			if ((lp->str == NULL) || (str_list_coll(lp->str, &(fh->si)))) {
1045173273Ssam				bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag);
1046173273Ssam				if (lp->str)
1047173273Ssam					bwsfree(lp->str);
1048173273Ssam				lp->str = bwsdup(fh->si->str);
1049173273Ssam			}
1050173273Ssam		} else
1051173273Ssam			bwsfwrite(fh->si->str, f_out, sort_opts_vals.zflag);
1052173273Ssam	}
1053173273Ssam}
1054173273Ssam
1055173273Ssam/*
1056173273Ssam * Read next line
1057173273Ssam */
1058173273Ssamstatic void
1059173273Ssamfile_header_read_next(struct file_header *fh)
1060173273Ssam{
1061173273Ssam
1062173273Ssam	if (fh && fh->fr) {
1063173273Ssam		struct bwstring *tmp;
1064173273Ssam
1065173273Ssam		tmp = file_reader_readline(fh->fr);
1066173273Ssam		if (tmp == NULL) {
1067173273Ssam			file_reader_free(fh->fr);
1068173273Ssam			fh->fr = NULL;
1069173273Ssam			if (fh->si) {
1070173273Ssam				sort_list_item_clean(fh->si);
1071173273Ssam				sort_free(fh->si);
1072173273Ssam				fh->si = NULL;
1073173273Ssam			}
1074173273Ssam		} else {
1075173273Ssam			if (fh->si == NULL)
1076173273Ssam				fh->si = sort_list_item_alloc();
1077173273Ssam			sort_list_item_set(fh->si, tmp);
1078173273Ssam		}
1079173273Ssam	}
1080173273Ssam}
1081173273Ssam
1082173273Ssam/*
1083173273Ssam * Merge array of "files headers"
1084173273Ssam */
1085173273Ssamstatic void
1086173273Ssamfile_headers_merge(size_t fnum, struct file_header **fh, FILE *f_out)
1087173273Ssam{
1088173273Ssam	struct last_printed lp;
1089173273Ssam	size_t i;
1090173273Ssam
1091173273Ssam	memset(&lp, 0, sizeof(lp));
1092173273Ssam
1093173273Ssam	/*
1094178354Ssam	 * construct the initial sort structure
1095178354Ssam	 */
1096178354Ssam	for (i = 0; i < fnum; i++)
1097178354Ssam		file_header_list_push(fh[i], fh, i);
1098178354Ssam
1099178354Ssam	while (fh[0]->fr) { /* unfinished files are always in front */
1100178354Ssam		/* output the smallest line: */
1101178354Ssam		file_header_print(fh[0], f_out, &lp);
1102178354Ssam		/* read a new line, if possible: */
1103173273Ssam		file_header_read_next(fh[0]);
1104173273Ssam		/* re-arrange the list: */
1105178354Ssam		file_header_list_rearrange_from_header(fh, fnum);
1106173273Ssam	}
1107178354Ssam
1108183246Ssam	if (lp.str)
1109178354Ssam		bwsfree(lp.str);
1110178354Ssam}
1111178354Ssam
1112183246Ssam/*
1113178354Ssam * Merges the given files into the output file, which can be
1114178354Ssam * stdout.
1115178354Ssam */
1116183246Ssamstatic void
1117183246Ssammerge_files_array(size_t argc, char **argv, const char *fn_out)
1118183246Ssam{
1119183246Ssam
1120183246Ssam	if (argv && fn_out) {
1121183246Ssam		struct file_header **fh;
1122183246Ssam		FILE *f_out;
1123181197Ssam		size_t i;
1124178354Ssam
1125173273Ssam		f_out = openfile(fn_out, "w");
1126173273Ssam
1127173273Ssam		if (f_out == NULL)
1128173273Ssam			err(2, NULL);
1129173273Ssam
1130173273Ssam		fh = sort_malloc((argc + 1) * sizeof(struct file_header *));
1131173273Ssam
1132173273Ssam		for (i = 0; i < argc; i++)
1133173273Ssam			file_header_init(fh + i, argv[i], (size_t) i);
1134173273Ssam
1135173273Ssam		file_headers_merge(argc, fh, f_out);
1136173273Ssam
1137173273Ssam		for (i = 0; i < argc; i++)
1138173273Ssam			file_header_close(fh + i);
1139173273Ssam
1140173273Ssam		sort_free(fh);
1141173273Ssam
1142173273Ssam		closefile(f_out, fn_out);
1143178354Ssam	}
1144173273Ssam}
1145173273Ssam
1146173273Ssam/*
1147173273Ssam * Shrinks the file list until its size smaller than max number of opened files
1148173273Ssam */
1149173273Ssamstatic int
1150173273Ssamshrink_file_list(struct file_list *fl)
1151170530Ssam{
1152170530Ssam
1153170530Ssam	if ((fl == NULL) || (size_t) (fl->count) < max_open_files)
1154170530Ssam		return (0);
1155170530Ssam	else {
1156170530Ssam		struct file_list new_fl;
1157170530Ssam		size_t indx = 0;
1158170530Ssam
1159170530Ssam		file_list_init(&new_fl, true);
1160170530Ssam		while (indx < fl->count) {
1161170530Ssam			char *fnew;
1162170530Ssam			size_t num;
1163170530Ssam
1164170530Ssam			num = fl->count - indx;
1165170530Ssam			fnew = new_tmp_file_name();
1166170530Ssam
1167170530Ssam			if ((size_t) num >= max_open_files)
1168170530Ssam				num = max_open_files - 1;
1169170530Ssam			merge_files_array(num, fl->fns + indx, fnew);
1170170530Ssam			if (fl->tmp) {
1171170530Ssam				size_t i;
1172170530Ssam
1173170530Ssam				for (i = 0; i < num; i++)
1174170530Ssam					unlink(fl->fns[indx + i]);
1175170530Ssam			}
1176170530Ssam			file_list_add(&new_fl, fnew, false);
1177170530Ssam			indx += num;
1178170530Ssam		}
1179183254Ssam		fl->tmp = false; /* already taken care of */
1180183254Ssam		file_list_clean(fl);
1181183254Ssam
1182170530Ssam		fl->count = new_fl.count;
1183170530Ssam		fl->fns = new_fl.fns;
1184170530Ssam		fl->sz = new_fl.sz;
1185170530Ssam		fl->tmp = new_fl.tmp;
1186170530Ssam
1187172055Ssam		return (1);
1188170530Ssam	}
1189170530Ssam}
1190170530Ssam
1191183254Ssam/*
1192183254Ssam * Merge list of files
1193183254Ssam */
1194183254Ssamvoid
1195183254Ssammerge_files(struct file_list *fl, const char *fn_out)
1196183254Ssam{
1197183254Ssam
1198183254Ssam	if (fl && fn_out) {
1199183254Ssam		while (shrink_file_list(fl));
1200183254Ssam
1201183254Ssam		merge_files_array(fl->count, fl->fns, fn_out);
1202183254Ssam	}
1203183254Ssam}
1204183254Ssam
1205183254Ssamstatic const char *
1206183254Ssamget_sort_method_name(int sm)
1207183254Ssam{
1208183254Ssam
1209183254Ssam	if (sm == SORT_MERGESORT)
1210183254Ssam		return "mergesort";
1211183254Ssam	else if (sort_opts_vals.sort_method == SORT_RADIXSORT)
1212183254Ssam		return "radixsort";
1213183254Ssam	else if (sort_opts_vals.sort_method == SORT_HEAPSORT)
1214183254Ssam		return "heapsort";
1215183254Ssam	else
1216183254Ssam		return "quicksort";
1217183254Ssam}
1218183254Ssam
1219183254Ssam/*
1220183254Ssam * Wrapper for qsort
1221183254Ssam */
1222173273Ssamstatic int sort_qsort(void *list, size_t count, size_t elem_size,
1223173273Ssam    int (*cmp_func)(const void *, const void *))
1224183254Ssam{
1225173273Ssam
1226178354Ssam	qsort(list, count, elem_size, cmp_func);
1227173273Ssam	return (0);
1228173273Ssam}
1229173273Ssam
1230173273Ssam/*
1231173273Ssam * Sort list of lines and writes it to the file
1232183254Ssam */
1233183254Ssamvoid
1234173273Ssamsort_list_to_file(struct sort_list *list, const char *outfile)
1235173273Ssam{
1236173273Ssam	struct sort_mods *sm = &(keys[0].sm);
1237183254Ssam
1238173273Ssam	if (!(sm->Mflag) && !(sm->Rflag) && !(sm->Vflag) && !(sm->Vflag) &&
1239173273Ssam	    !(sm->gflag) && !(sm->hflag) && !(sm->nflag)) {
1240173273Ssam		if ((sort_opts_vals.sort_method == SORT_DEFAULT) && byte_sort)
1241183254Ssam			sort_opts_vals.sort_method = SORT_RADIXSORT;
1242173273Ssam
1243173273Ssam	} else if (sort_opts_vals.sort_method == SORT_RADIXSORT)
1244173273Ssam		err(2, "%s", getstr(9));
1245173273Ssam
1246173273Ssam	/*
1247173273Ssam	 * to handle stable sort and the unique cases in the
1248173273Ssam	 * right order, we need stable basic algorithm
1249173273Ssam	 */
1250173273Ssam	if (sort_opts_vals.sflag) {
1251173273Ssam		switch (sort_opts_vals.sort_method){
1252170530Ssam		case SORT_MERGESORT:
1253170530Ssam			break;
1254170530Ssam		case SORT_RADIXSORT:
1255183255Ssam			break;
1256183255Ssam		case SORT_DEFAULT:
1257183255Ssam			sort_opts_vals.sort_method = SORT_MERGESORT;
1258183255Ssam			break;
1259183255Ssam		default:
1260183255Ssam			errx(2, "%s", getstr(10));
1261183255Ssam		};
1262183255Ssam	}
1263183255Ssam
1264183255Ssam	if (sort_opts_vals.sort_method == SORT_DEFAULT)
1265183255Ssam		sort_opts_vals.sort_method = DEFAULT_SORT_ALGORITHM;
1266183255Ssam
1267183255Ssam	if (debug_sort)
1268183255Ssam		printf("sort_method=%s\n",
1269183255Ssam		    get_sort_method_name(sort_opts_vals.sort_method));
1270183255Ssam
1271183255Ssam	switch (sort_opts_vals.sort_method){
1272183255Ssam	case SORT_RADIXSORT:
1273183255Ssam		rxsort(list->list, list->count);
1274183255Ssam		sort_list_dump(list, outfile);
1275183255Ssam		break;
1276183255Ssam	case SORT_MERGESORT:
1277183255Ssam		mt_sort(list, mergesort, outfile);
1278183255Ssam		break;
1279183255Ssam	case SORT_HEAPSORT:
1280183255Ssam		mt_sort(list, heapsort,	outfile);
1281183257Ssam		break;
1282183257Ssam	case SORT_QSORT:
1283183257Ssam		mt_sort(list, sort_qsort, outfile);
1284183257Ssam		break;
1285183257Ssam	default:
1286183257Ssam		mt_sort(list, DEFAULT_SORT_FUNC, outfile);
1287183257Ssam		break;
1288183257Ssam	}
1289183257Ssam}
1290183257Ssam
1291183257Ssam/******************* MT SORT ************************/
1292183257Ssam
1293183257Ssam#if defined(SORT_THREADS)
1294183257Ssam/* semaphore to count threads */
1295183257Ssamstatic sem_t mtsem;
1296183257Ssam
1297183257Ssam/* current system sort function */
1298183257Ssamstatic int (*g_sort_func)(void *, size_t, size_t,
1299183254Ssam    int(*)(const void *, const void *));
1300183254Ssam
1301183254Ssam/*
1302183254Ssam * Sort cycle thread (in multi-threaded mode)
1303183254Ssam */
1304183254Ssamstatic void*
1305183254Ssammt_sort_thread(void* arg)
1306183254Ssam{
1307183254Ssam	struct sort_list *list = arg;
1308183254Ssam
1309183254Ssam	g_sort_func(list->list, list->count, sizeof(struct sort_list_item *),
1310183254Ssam	    (int(*)(const void *, const void *)) list_coll);
1311183255Ssam
1312183255Ssam	sem_post(&mtsem);
1313183257Ssam
1314183254Ssam	return (arg);
1315183254Ssam}
1316183254Ssam
1317183254Ssam/*
1318183254Ssam * Compare sub-lists. Empty sub-lists always go to the end of the list.
1319183254Ssam */
1320183254Ssamstatic int
1321183254Ssamsub_list_cmp(struct sort_list *l1, struct sort_list *l2)
1322183254Ssam{
1323183254Ssam
1324183254Ssam	if (l1 == l2)
1325183254Ssam		return (0);
1326183254Ssam	else {
1327183254Ssam		if (l1->count == 0) {
1328183254Ssam			return ((l2->count == 0) ? 0 : +1);
1329183254Ssam		} else if (l2->count == 0) {
1330183254Ssam			return (-1);
1331183256Ssam		} else {
1332183256Ssam			int ret;
1333183256Ssam
1334183256Ssam			ret = list_coll(&(l1->list[0]), &(l2->list[0]));
1335183256Ssam			if (!ret)
1336183256Ssam				return ((l1->sub_list_pos < l2->sub_list_pos) ?
1337183254Ssam				    -1 : +1);
1338183254Ssam			return (ret);
1339183254Ssam		}
1340183254Ssam	}
1341183254Ssam}
1342183254Ssam
1343183254Ssam/*
1344183254Ssam * Swap two array elements
1345183254Ssam */
1346183254Ssamstatic void
1347183254Ssamsub_list_swap(struct sort_list **sl, size_t i1, size_t i2)
1348183254Ssam{
1349183254Ssam	struct sort_list *tmp;
1350183255Ssam
1351183255Ssam	tmp = sl[i1];
1352183257Ssam	sl[i1] = sl[i2];
1353183254Ssam	sl[i2] = tmp;
1354183254Ssam}
1355183254Ssam
1356183254Ssam/* heap algorithm ==>> */
1357183254Ssam
1358183254Ssam/*
1359183254Ssam * See heap sort algorithm
1360183254Ssam * "Raises" last element to its right place
1361183254Ssam */
1362183254Ssamstatic void
1363183254Ssamsub_list_swim(struct sort_list **sl, size_t indx)
1364183254Ssam{
1365183254Ssam
1366183254Ssam	if (indx > 0) {
1367183254Ssam		size_t parent_index;
1368183254Ssam
1369170530Ssam		parent_index = (indx - 1) >> 1;
1370170530Ssam
1371170530Ssam		if (sub_list_cmp(sl[indx], sl[parent_index]) < 0) {
1372170530Ssam			/* swap child and parent and continue */
1373170530Ssam			sub_list_swap(sl, indx, parent_index);
1374178354Ssam			sub_list_swim(sl, parent_index);
1375170530Ssam		}
1376170530Ssam	}
1377170530Ssam}
1378170530Ssam
1379170530Ssam/*
1380170530Ssam * Sink the top element to its correct position
1381170530Ssam */
1382170530Ssamstatic void
1383170530Ssamsub_list_sink(struct sort_list **sl, size_t indx, size_t size)
1384170530Ssam{
1385170530Ssam	size_t left_child_index;
1386170530Ssam	size_t right_child_index;
1387170530Ssam
1388170530Ssam	left_child_index = indx + indx + 1;
1389178354Ssam	right_child_index = left_child_index + 1;
1390170530Ssam
1391170530Ssam	if (left_child_index < size) {
1392170530Ssam		size_t min_child_index;
1393178354Ssam
1394170530Ssam		min_child_index = left_child_index;
1395170530Ssam
1396170530Ssam		if ((right_child_index < size) &&
1397170530Ssam		    (sub_list_cmp(sl[left_child_index],
1398170530Ssam		    sl[right_child_index]) > 0))
1399170530Ssam			min_child_index = right_child_index;
1400170530Ssam		if (sub_list_cmp(sl[indx], sl[min_child_index]) > 0) {
1401170530Ssam			sub_list_swap(sl, indx, min_child_index);
1402170530Ssam			sub_list_sink(sl, min_child_index, size);
1403170530Ssam		}
1404170530Ssam	}
1405170530Ssam}
1406170530Ssam
1407170530Ssam/* <<== heap algorithm */
1408170530Ssam
1409170530Ssam/*
1410170530Ssam * Adds element to the "right" end
1411170530Ssam */
1412170530Ssamstatic void
1413170530Ssamsub_list_push(struct sort_list *s, struct sort_list **sl, size_t size)
1414170530Ssam{
1415170530Ssam
1416170530Ssam	sl[size++] = s;
1417170530Ssam	sub_list_swim(sl, size - 1);
1418178354Ssam}
1419170530Ssam
1420170530Ssamstruct last_printed_item
1421170530Ssam{
1422170530Ssam	struct sort_list_item *item;
1423170530Ssam};
1424170530Ssam
1425170530Ssam/*
1426170530Ssam * Prints the current line of the file
1427170530Ssam */
1428170530Ssamstatic void
1429170530Ssamsub_list_header_print(struct sort_list *sl, FILE *f_out,
1430170530Ssam    struct last_printed_item *lp)
1431170530Ssam{
1432170530Ssam
1433184280Ssam	if (sl && sl->count && f_out && sl->list[0]->str) {
1434184280Ssam		if (sort_opts_vals.uflag) {
1435184280Ssam			if ((lp->item == NULL) || (list_coll(&(lp->item),
1436184280Ssam			    &(sl->list[0])))) {
1437184280Ssam				bwsfwrite(sl->list[0]->str, f_out,
1438184280Ssam				    sort_opts_vals.zflag);
1439184280Ssam				lp->item = sl->list[0];
1440184280Ssam			}
1441184280Ssam		} else
1442184280Ssam			bwsfwrite(sl->list[0]->str, f_out,
1443184280Ssam			    sort_opts_vals.zflag);
1444184280Ssam	}
1445184280Ssam}
1446184280Ssam
1447184280Ssam/*
1448184280Ssam * Read next line
1449184280Ssam */
1450184280Ssamstatic void
1451184280Ssamsub_list_next(struct sort_list *sl)
1452184280Ssam{
1453184280Ssam
1454184280Ssam	if (sl && sl->count) {
1455184280Ssam		sl->list += 1;
1456184280Ssam		sl->count -= 1;
1457184280Ssam	}
1458184280Ssam}
1459184280Ssam
1460184280Ssam/*
1461184280Ssam * Merge sub-lists to a file
1462184280Ssam */
1463184280Ssamstatic void
1464184280Ssammerge_sub_lists(struct sort_list **sl, size_t n, FILE* f_out)
1465170530Ssam{
1466170530Ssam	struct last_printed_item lp;
1467170530Ssam	size_t i;
1468170530Ssam
1469170530Ssam	memset(&lp,0,sizeof(lp));
1470170530Ssam
1471170530Ssam	/* construct the initial list: */
1472170530Ssam	for (i = 0; i < n; i++)
1473170530Ssam		sub_list_push(sl[i], sl, i);
1474170530Ssam
1475170530Ssam	while (sl[0]->count) { /* unfinished lists are always in front */
1476170530Ssam		/* output the smallest line: */
1477170530Ssam		sub_list_header_print(sl[0], f_out, &lp);
1478178354Ssam		/* move to a new line, if possible: */
1479170530Ssam		sub_list_next(sl[0]);
1480170530Ssam		/* re-arrange the list: */
1481178354Ssam		sub_list_sink(sl, 0, n);
1482170530Ssam	}
1483170530Ssam}
1484170530Ssam
1485170530Ssam/*
1486170530Ssam * Merge sub-lists to a file
1487170530Ssam */
1488170530Ssamstatic void
1489170530Ssammerge_list_parts(struct sort_list **parts, size_t n, const char *fn)
1490170530Ssam{
1491170530Ssam	FILE* f_out;
1492170530Ssam
1493170530Ssam	f_out = openfile(fn,"w");
1494170530Ssam
1495170530Ssam	merge_sub_lists(parts, n, f_out);
1496170530Ssam
1497170530Ssam	closefile(f_out, fn);
1498170530Ssam}
1499170530Ssam
1500170530Ssam#endif /* defined(SORT_THREADS) */
1501170530Ssam/*
1502170530Ssam * Multi-threaded sort algorithm "driver"
1503170530Ssam */
1504170530Ssamstatic void
1505170530Ssammt_sort(struct sort_list *list,
1506170530Ssam    int(*sort_func)(void *, size_t, size_t, int(*)(const void *, const void *)),
1507170530Ssam    const char* fn)
1508170530Ssam{
1509170530Ssam#if defined(SORT_THREADS)
1510170530Ssam	if (nthreads < 2 || list->count < MT_SORT_THRESHOLD) {
1511170530Ssam		size_t nthreads_save = nthreads;
1512170530Ssam		nthreads = 1;
1513170530Ssam#endif
1514170530Ssam		/* if single thread or small data, do simple sort */
1515170530Ssam		sort_func(list->list, list->count,
1516170530Ssam		    sizeof(struct sort_list_item *),
1517170530Ssam		    (int(*)(const void *, const void *)) list_coll);
1518170530Ssam		sort_list_dump(list, fn);
1519170530Ssam#if defined(SORT_THREADS)
1520170530Ssam		nthreads = nthreads_save;
1521170530Ssam	} else {
1522170530Ssam		/* multi-threaded sort */
1523170530Ssam		struct sort_list **parts;
1524170530Ssam		size_t avgsize, cstart, i;
1525170530Ssam
1526184280Ssam		/* array of sub-lists */
1527170530Ssam		parts = sort_malloc(sizeof(struct sort_list*) * nthreads);
1528170530Ssam		cstart = 0;
1529170530Ssam		avgsize = list->count / nthreads;
1530170530Ssam
1531170530Ssam		/* set global system sort function */
1532170530Ssam		g_sort_func = sort_func;
1533170530Ssam
1534170530Ssam		/* set sublists */
1535184280Ssam		for (i = 0; i < nthreads; ++i) {
1536184280Ssam			size_t sz = 0;
1537170530Ssam
1538184280Ssam			parts[i] = sort_malloc(sizeof(struct sort_list));
1539173273Ssam			parts[i]->list = list->list + cstart;
1540173273Ssam			parts[i]->memsize = 0;
1541173273Ssam			parts[i]->sub_list_pos = i;
1542170530Ssam
1543170530Ssam			sz = (i == nthreads - 1) ? list->count - cstart :
1544170530Ssam			    avgsize;
1545170530Ssam
1546170530Ssam			parts[i]->count = sz;
1547170530Ssam
1548170530Ssam			parts[i]->size = parts[i]->count;
1549170530Ssam
1550170530Ssam			cstart += sz;
1551170530Ssam		}
1552170530Ssam
1553170530Ssam		/* init threads counting semaphore */
1554170530Ssam		sem_init(&mtsem, 0, 0);
1555170530Ssam
1556182830Ssam		/* start threads */
1557170530Ssam		for (i = 0; i < nthreads; ++i) {
1558170530Ssam			pthread_t pth;
1559170530Ssam			pthread_attr_t attr;
1560170530Ssam
1561170530Ssam			pthread_attr_init(&attr);
1562170530Ssam			pthread_attr_setdetachstate(&attr, PTHREAD_DETACHED);
1563170530Ssam
1564170530Ssam			for (;;) {
1565170530Ssam				int res = pthread_create(&pth, &attr,
1566170530Ssam				    mt_sort_thread, parts[i]);
1567170530Ssam
1568170530Ssam				if (res >= 0)
1569170530Ssam					break;
1570170530Ssam				if (errno == EAGAIN) {
1571170530Ssam					pthread_yield();
1572170530Ssam					continue;
1573178354Ssam				}
1574170530Ssam				err(2, NULL);
1575170530Ssam			}
1576170530Ssam
1577182829Ssam			pthread_attr_destroy(&attr);
1578170530Ssam		}
1579170530Ssam
1580170530Ssam		/* wait for threads completion */
1581170530Ssam		for (i = 0; i < nthreads; ++i) {
1582170530Ssam			sem_wait(&mtsem);
1583170530Ssam		}
1584170530Ssam		/* destroy the semaphore - we do not need it anymore */
1585170530Ssam		sem_destroy(&mtsem);
1586170530Ssam
1587170530Ssam		/* merge sorted sub-lists to the file */
1588170530Ssam		merge_list_parts(parts, nthreads, fn);
1589170530Ssam
1590170530Ssam		/* free sub-lists data */
1591170530Ssam		for (i = 0; i < nthreads; ++i) {
1592170530Ssam			sort_free(parts[i]);
1593170530Ssam		}
1594178354Ssam		sort_free(parts);
1595170530Ssam	}
1596170530Ssam#endif /* defined(SORT_THREADS) */
1597170530Ssam}
1598173273Ssam