1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 1998-2003 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#ifndef	_SORT_TYPES_H
28#define	_SORT_TYPES_H
29
30#pragma ident	"%Z%%M%	%I%	%E% SMI"
31
32#ifdef	__cplusplus
33extern "C" {
34#endif
35
36#include <sys/resource.h>
37#include <sys/types.h>
38#include <limits.h>
39#include <stdio.h>
40#include <stdlib.h>
41
42typedef	int flag_t;
43
44typedef	int (*cmp_fcn_t)(void *, void *, flag_t);
45
46typedef union vchar {
47	char	sc;
48	uchar_t	usc;
49	wchar_t	wc;
50} vchar_t;
51
52typedef union vcharptr {
53	char	*sp;
54	uchar_t	*usp;
55	wchar_t *wp;
56} vcharptr_t;
57
58typedef struct line_rec {
59	vcharptr_t l_data;		/* raw data */
60	vcharptr_t l_raw_collate;	/* collatable raw data */
61	vcharptr_t l_collate;		/* key-ordered collatable string */
62	ssize_t	l_data_length;
63	ssize_t	l_collate_length;
64	ssize_t	l_collate_bufsize;
65} line_rec_t;
66
67enum field_species {
68	ALPHA,
69	MONTH,
70	NUMERIC
71};
72
73#define	FIELD_DICTIONARY_ORDER		0x1
74#define	FIELD_FOLD_UPPERCASE		0x2
75#define	FIELD_IGNORE_NONPRINTABLES	0x4
76#define	FIELD_IGNORE_BLANKS_START	0x8
77#define	FIELD_IGNORE_BLANKS_END		0x10
78
79#define	FIELD_REVERSE_COMPARISONS	0x20
80
81#define	FIELD_MODIFIERS_DEFINED		0x40
82
83typedef struct field {
84	struct field		*f_next;
85
86	/*
87	 * field ops vector
88	 */
89	ssize_t			(*f_convert)(struct field *, line_rec_t *,
90	    vchar_t, ssize_t, ssize_t, ssize_t);
91	enum field_species	f_species;
92
93	/*
94	 * starting and ending fields, and offsets
95	 */
96	int			f_start_field;
97	ssize_t			f_start_offset;
98
99	int			f_end_field;
100	ssize_t			f_end_offset;
101
102	flag_t			f_options;
103} field_t;
104
105#define	STREAM_SOURCE_MASK	0x000f
106#define	STREAM_NO_SOURCE	0x0000
107#define	STREAM_ARRAY		0x0001
108#define	STREAM_MMAP		0x0002
109#define	STREAM_SINGLE		0x0004
110#define	STREAM_WIDE		0x0008
111
112#define	STREAM_OPEN		0x0010
113#define	STREAM_PRIMED		0x0020
114
115#define	STREAM_OUTPUT		0x0040
116#define	STREAM_EOS_REACHED	0x0080
117#define	STREAM_NOTFILE		0x0100
118#define	STREAM_UNIQUE		0x0200
119#define	STREAM_INSTANT		0x0400
120#define	STREAM_TEMPORARY	0x0800
121#define	STREAM_NOT_FREEABLE	0x1000
122
123#define	DEFAULT_INPUT_SIZE	(1 * MEGABYTE)
124#define	DEFAULT_RELEASE_SIZE	(MEGABYTE / 2)
125
126#define	CHAR_AVG_LINE	32
127#define	WCHAR_AVG_LINE	(sizeof (wchar_t) * CHAR_AVG_LINE)
128#define	XFRM_MULTIPLIER	8
129
130#define	NEXT_LINE_COMPLETE	0x0
131#define	NEXT_LINE_INCOMPLETE	0x1
132
133#define	PRIME_SUCCEEDED		0x0
134#define	PRIME_FAILED_EMPTY_FILE	0x1
135#define	PRIME_FAILED		0x2
136
137typedef struct stream_array {
138	line_rec_t	**s_array;
139	ssize_t		s_array_size;
140	ssize_t		s_cur_index;
141} stream_array_t;
142
143typedef struct stream_simple_file {
144	/*
145	 * stream_simple_file_t is used for STREAM_MMAP and for STREAM_OUTPUT
146	 * for either single- (STREAM_SINGLE | STREAM_OUTPUT) or multi-byte
147	 * (STREAM_WIDE | STREAM_OUTPUT) locales.
148	 */
149	int		s_fd;			/* file descriptor */
150	caddr_t		s_release_origin;	/* start for next madvise(3C) */
151} stream_simple_file_t;
152
153typedef struct stream_buffered_file {
154	/*
155	 * stream_buffered_file_t is used for both STREAM_STDIO and
156	 * STREAM_WIDE.
157	 */
158	FILE		*s_fp;			/* file stream */
159	void		*s_vbuf;		/* stdio alternate buffer */
160	size_t		s_bytes_used;
161} stream_buffered_file_t;
162
163typedef union stream_type {
164	stream_array_t		LA;	/* array of line records */
165	stream_simple_file_t	SF;	/* file accessed via mmap */
166	stream_buffered_file_t	BF;	/* file accessed via stdio */
167} stream_type_t;
168
169struct stream;
170
171typedef struct stream_ops {
172	int	(*sop_is_closable)(struct stream *);
173	int	(*sop_close)(struct stream *);
174	int	(*sop_eos)(struct stream *);
175	ssize_t	(*sop_fetch)(struct stream *);
176	void	(*sop_flush)(struct stream *);
177	int	(*sop_free)(struct stream *);
178	int	(*sop_open_for_write)(struct stream *);
179	int	(*sop_prime)(struct stream *);
180	void	(*sop_put_line)(struct stream *, line_rec_t *);
181	void	(*sop_release_line)(struct stream *);
182	void	(*sop_send_eol)(struct stream *);
183	int	(*sop_unlink)(struct stream *);
184} stream_ops_t;
185
186#define	SOP_IS_CLOSABLE(s)	((s)->s_ops.sop_is_closable)(s)
187#define	SOP_CLOSE(s)		((s)->s_ops.sop_close)(s)
188#define	SOP_EOS(s)		((s)->s_ops.sop_eos)(s)
189#define	SOP_FETCH(s)		((s)->s_ops.sop_fetch)(s)
190#define	SOP_FLUSH(s)		((s)->s_ops.sop_flush)(s)
191#define	SOP_FREE(s)		((s)->s_ops.sop_free)(s)
192#define	SOP_OPEN_FOR_WRITE(s)	((s)->s_ops.sop_open_for_write)(s)
193#define	SOP_PRIME(s)		((s)->s_ops.sop_prime)(s)
194#define	SOP_PUT_LINE(s, l)	((s)->s_ops.sop_put_line)(s, l)
195#define	SOP_RELEASE_LINE(s)	((s)->s_ops.sop_release_line)(s)
196#define	SOP_SEND_EOL(s)		((s)->s_ops.sop_send_eol)(s)
197#define	SOP_UNLINK(s)		((s)->s_ops.sop_unlink)(s)
198
199/*
200 * The stream_t type is provided to simplify access to files, particularly for
201 * external merges.
202 */
203typedef struct stream {
204	struct stream	*s_consumer;	/* dependent on s_buffer */
205	struct stream	*s_previous;
206	struct stream	*s_next;
207
208	char		*s_filename;
209
210	line_rec_t	s_current;	/* present line buffers */
211	stream_ops_t	s_ops;		/* type-specific ops vector */
212	stream_type_t	s_type;		/* type-specific attributes */
213
214	void		*s_buffer;
215	size_t		s_buffer_size;
216	off_t		s_filesize;
217	size_t		s_element_size;
218	flag_t		s_status;	/* flags */
219	ino_t		s_ino;
220	dev_t		s_dev;
221} stream_t;
222
223/*
224 * sort(1) has, for debugging purposes, a primitive compile-time option to
225 * generate statistics of various operations executed during an invocation.
226 * These statistics are recorded in the following sort_statistics_t structure.
227 */
228typedef struct sort_statistics {
229	u_longlong_t	st_avail_mem;
230	u_longlong_t	st_convert_reallocs;
231	u_longlong_t	st_fetched_lines;
232	u_longlong_t	st_insert_full_down;
233	u_longlong_t	st_insert_full_input;
234	u_longlong_t	st_insert_full_up;
235	u_longlong_t	st_line_conversions;
236	u_longlong_t	st_not_unique_lines;
237	u_longlong_t	st_put_lines;
238	u_longlong_t	st_put_temp_lines_internal;
239	u_longlong_t	st_put_temp_lines_merge;
240	u_longlong_t	st_put_unique_lines;
241	u_longlong_t	st_shelved_lines;
242	u_longlong_t	st_subfiles;		/* number of insertion sorts */
243	u_longlong_t	st_swaps;
244	u_longlong_t	st_tqs_calls;
245
246	uint_t		st_input_files;
247	uint_t		st_merge_files;
248} sort_statistics_t;
249
250typedef struct sort {
251	stream_t	*m_input_streams;
252	char		*m_output_filename;
253
254	stream_t	*m_temporary_streams;
255	char		*m_tmpdir_template;
256
257	field_t		*m_fields_head;
258
259	cmp_fcn_t	m_compare_fn;
260	ssize_t		(*m_coll_convert)(field_t *, line_rec_t *, flag_t,
261	    vchar_t);
262
263	sort_statistics_t *m_stats;
264	size_t		m_memory_limit;
265	size_t		m_memory_available;
266
267	flag_t		m_check_if_sorted_only;
268	flag_t		m_merge_only;
269	flag_t		m_unique_lines;
270	flag_t		m_entire_line;
271
272	enum field_species m_default_species;
273	flag_t		m_field_options;
274	vchar_t		m_field_separator;
275
276	flag_t		m_c_locale;
277	flag_t		m_single_byte_locale;
278	flag_t		m_input_from_stdin;
279	flag_t		m_output_to_stdout;
280	flag_t		m_verbose;
281} sort_t;
282
283#ifdef	__cplusplus
284}
285#endif
286
287#endif	/* _SORT_TYPES_H */
288