1/*	$NetBSD: sort.h,v 1.33 2010/12/18 23:09:48 christos Exp $	*/
2
3/*-
4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Ben Harris and Jaromir Dolecek.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32/*-
33 * Copyright (c) 1993
34 *	The Regents of the University of California.  All rights reserved.
35 *
36 * This code is derived from software contributed to Berkeley by
37 * Peter McIlroy.
38 *
39 * Redistribution and use in source and binary forms, with or without
40 * modification, are permitted provided that the following conditions
41 * are met:
42 * 1. Redistributions of source code must retain the above copyright
43 *    notice, this list of conditions and the following disclaimer.
44 * 2. Redistributions in binary form must reproduce the above copyright
45 *    notice, this list of conditions and the following disclaimer in the
46 *    documentation and/or other materials provided with the distribution.
47 * 3. Neither the name of the University nor the names of its contributors
48 *    may be used to endorse or promote products derived from this software
49 *    without specific prior written permission.
50 *
51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE.
62 *
63 *	@(#)sort.h	8.1 (Berkeley) 6/6/93
64 */
65
66#include <sys/param.h>
67
68#include <err.h>
69#include <errno.h>
70#include <fcntl.h>
71#include <limits.h>
72#include <stddef.h>
73#include <stdio.h>
74#include <stdlib.h>
75#include <string.h>
76
77#define NBINS		256
78
79/* values for masks, weights, and other flags. */
80/* R and F get used to index weight_tables[] */
81#define	R	0x01	/* Field is reversed */
82#define	F	0x02	/* weight lower and upper case the same */
83#define	I	0x04	/* mask out non-printable characters */
84#define	D	0x08	/* sort alphanumeric characters only */
85#define	N	0x10	/* Field is a number */
86#define	BI	0x20	/* ignore blanks in icol */
87#define	BT	0x40	/* ignore blanks in tcol */
88#define	L	0x80	/* Sort by field length */
89
90/* masks for delimiters: blanks, fields, and termination. */
91#define BLANK 1		/* ' ', '\t'; '\n' if -R is invoked */
92#define FLD_D 2		/* ' ', '\t' default; from -t otherwise */
93#define REC_D_F 4	/* '\n' default; from -R otherwise */
94
95#define min(a, b) ((a) < (b) ? (a) : (b))
96#define max(a, b) ((a) > (b) ? (a) : (b))
97
98#define	FCLOSE(file) {							\
99	if (EOF == fclose(file))					\
100		err(2, "%p", file);					\
101}
102
103#define	EWRITE(ptr, size, n, f) {					\
104	if (!fwrite(ptr, size, n, f))					\
105		 err(2, NULL);						\
106}
107
108/* Records are limited to MAXBUFSIZE (8MB) and less if you want to sort
109 * in a sane way.
110 * Anyone who wants to sort data records longer than 2GB definitely needs a
111 * different program! */
112typedef unsigned int length_t;
113
114/* A record is a key/line pair starting at rec.data. It has a total length
115 * and an offset to the start of the line half of the pair.
116 */
117typedef struct recheader {
118	length_t length;	/* total length of key and line */
119	length_t offset;	/* to line */
120	int      keylen;	/* length of key */
121	u_char   data[];	/* key then line */
122} RECHEADER;
123
124/* This is the column as seen by struct field.  It is used by enterfield.
125 * They are matched with corresponding coldescs during initialization.
126 */
127struct column {
128	struct coldesc *p;
129	int num;
130	int indent;
131};
132
133/* a coldesc has a number and pointers to the beginning and end of the
134 * corresponding column in the current line.  This is determined in enterkey.
135 */
136typedef struct coldesc {
137	u_char *start;
138	u_char *end;
139	int num;
140} COLDESC;
141
142/* A field has an initial and final column; an omitted final column
143 * implies the end of the line.  Flags regulate omission of blanks and
144 * numerical sorts; mask determines which characters are ignored (from -i, -d);
145 * weights determines the sort weights of a character (from -f, -r).
146 *
147 * The first field contain the global flags etc.
148 * The list terminates when icol = 0.
149 */
150struct field {
151	struct column icol;
152	struct column tcol;
153	u_int flags;
154	u_char *mask;
155	u_char *weights;
156};
157
158struct filelist {
159	const char * const * names;
160};
161
162typedef int (*get_func_t)(FILE *, RECHEADER *, u_char *, struct field *);
163typedef void (*put_func_t)(const RECHEADER *, FILE *);
164
165extern u_char ascii[NBINS], Rascii[NBINS], Ftable[NBINS], RFtable[NBINS];
166extern u_char *const weight_tables[4];   /* ascii, Rascii, Ftable, RFtable */
167extern u_char d_mask[NBINS];
168extern int SINGL_FLD, SEP_FLAG, UNIQUE, REVERSE;
169extern int posix_sort;
170extern int REC_D;
171extern const char *tmpdir;
172extern struct coldesc *clist;
173extern int ncols;
174
175#define DEBUG(ch) (debug_flags & (1 << ((ch) & 31)))
176extern unsigned int debug_flags;
177
178RECHEADER *allocrec(RECHEADER *, size_t);
179void	 append(RECHEADER **, int, FILE *, void (*)(const RECHEADER *, FILE *));
180void	 concat(FILE *, FILE *);
181length_t enterkey(RECHEADER *, const u_char *, u_char *, size_t, struct field *);
182void	 fixit(int *, char **, const char *);
183void	 fldreset(struct field *);
184FILE	*ftmp(void);
185void	 fmerge(struct filelist *, int, FILE *, struct field *);
186void	 save_for_merge(FILE *, get_func_t, struct field *);
187void	 merge_sort(FILE *, put_func_t, struct field *);
188void	 fsort(struct filelist *, int, FILE *, struct field *);
189int	 geteasy(FILE *, RECHEADER *, u_char *, struct field *);
190int	 makekey(FILE *, RECHEADER *, u_char *, struct field *);
191int	 makeline(FILE *, RECHEADER *, u_char *, struct field *);
192void	 makeline_copydown(RECHEADER *);
193int	 optval(int, int);
194__dead void	 order(struct filelist *, struct field *);
195void	 putline(const RECHEADER *, FILE *);
196void	 putrec(const RECHEADER *, FILE *);
197void	 putkeydump(const RECHEADER *, FILE *);
198void	 rd_append(int, int, int, FILE *, u_char *, u_char *);
199void	 radix_sort(RECHEADER **, RECHEADER **, int);
200int	 setfield(const char *, struct field *, int);
201void	 settables(void);
202