tsort.c revision 87301
118316Swollman/*
218316Swollman * Copyright (c) 1989, 1993, 1994
318316Swollman *	The Regents of the University of California.  All rights reserved.
418316Swollman *
518316Swollman * This code is derived from software contributed to Berkeley by
618316Swollman * Michael Rendell of Memorial University of Newfoundland.
718316Swollman *
818316Swollman * Redistribution and use in source and binary forms, with or without
918316Swollman * modification, are permitted provided that the following conditions
1018316Swollman * are met:
1118316Swollman * 1. Redistributions of source code must retain the above copyright
1218316Swollman *    notice, this list of conditions and the following disclaimer.
1318316Swollman * 2. Redistributions in binary form must reproduce the above copyright
1446303Smarkm *    notice, this list of conditions and the following disclaimer in the
1518316Swollman *    documentation and/or other materials provided with the distribution.
1618316Swollman * 3. All advertising materials mentioning features or use of this software
1718316Swollman *    must display the following acknowledgement:
1818316Swollman *	This product includes software developed by the University of
1918316Swollman *	California, Berkeley and its contributors.
2018316Swollman * 4. Neither the name of the University nor the names of its contributors
2118316Swollman *    may be used to endorse or promote products derived from this software
2218316Swollman *    without specific prior written permission.
2318316Swollman *
2418316Swollman * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2518316Swollman * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2618316Swollman * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2718316Swollman * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2818316Swollman * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2918316Swollman * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3018316Swollman * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3118316Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3246303Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3350476Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3418316Swollman * SUCH DAMAGE.
3518316Swollman *
3618316Swollman * $FreeBSD: head/usr.bin/tsort/tsort.c 87301 2001-12-03 21:32:54Z dwmalone $
3718316Swollman */
3818316Swollman
3918316Swollman#ifndef lint
4046303Smarkmstatic const char copyright[] =
4118316Swollman"@(#) Copyright (c) 1989, 1993, 1994\n\
4218316Swollman	The Regents of the University of California.  All rights reserved.\n";
4346303Smarkm#endif /* not lint */
4446303Smarkm
4546303Smarkm#ifndef lint
4646303Smarkmstatic const char sccsid[] = "@(#)tsort.c	8.3 (Berkeley) 5/4/95";
4746303Smarkm#endif /* not lint */
4850969Speter
4946303Smarkm#include <sys/types.h>
5046303Smarkm
5118316Swollman#include <ctype.h>
52102231Strhodes#include <db.h>
5318316Swollman#include <err.h>
5418316Swollman#include <errno.h>
5518316Swollman#include <fcntl.h>
5618316Swollman#include <stdio.h>
5718316Swollman#include <stdlib.h>
5820339Swollman#include <string.h>
5981604Speter#include <unistd.h>
6046303Smarkm
61118582Simp/*
62118582Simp *  Topological sort.  Input is a list of pairs of strings separated by
6320339Swollman *  white space (spaces, tabs, and/or newlines); strings are written to
6418316Swollman *  standard output in sorted order, one per line.
6518316Swollman *
6646303Smarkm *  usage:
6718316Swollman *     tsort [-dlq] [inputfile]
6818316Swollman *  If no input file is specified, standard input is read.
6919880Swollman *
7019880Swollman *  Should be compatible with AT&T tsort HOWEVER the output is not identical
7119880Swollman *  (i.e. for most graphs there is more than one sorted order, and this tsort
7219880Swollman *  usually generates a different one then the AT&T tsort).  Also, cycle
7319880Swollman *  reporting seems to be more accurate in this version (the AT&T tsort
7419880Swollman *  sometimes says a node is in a cycle when it isn't).
7519880Swollman *
7619880Swollman *  Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
7719880Swollman */
7819880Swollman#define	HASHSIZE	53		/* doesn't need to be big */
7919880Swollman#define	NF_MARK		0x1		/* marker for cycle detection */
8019880Swollman#define	NF_ACYCLIC	0x2		/* this node is cycle free */
8119880Swollman#define	NF_NODEST	0x4		/* Unreachable */
8219880Swollman
8319880Swollman
8419880Swollmantypedef struct node_str NODE;
8519880Swollman
8619880Swollmanstruct node_str {
8719880Swollman	NODE **n_prevp;			/* pointer to previous node's n_next */
8819880Swollman	NODE *n_next;			/* next node in graph */
8919880Swollman	NODE **n_arcs;			/* array of arcs to other nodes */
9019880Swollman	int n_narcs;			/* number of arcs in n_arcs[] */
9119880Swollman	int n_arcsize;			/* size of n_arcs[] array */
9219880Swollman	int n_refcnt;			/* # of arcs pointing to this node */
9319880Swollman	int n_flags;			/* NF_* */
9419880Swollman	char n_name[1];			/* name of this node */
9519880Swollman};
9619880Swollman
9719880Swollmantypedef struct _buf {
9819880Swollman	char *b_buf;
9919880Swollman	int b_bsize;
10019880Swollman} BUF;
10119880Swollman
10219880SwollmanDB *db;
10319880SwollmanNODE *graph, **cycle_buf, **longest_cycle;
10419880Swollmanint debug, longest, quiet;
10519880Swollman
10619880Swollmanvoid	 add_arc __P((char *, char *));
10719880Swollmanint	 find_cycle __P((NODE *, NODE *, int, int));
10819880SwollmanNODE	*get_node __P((char *));
10919880Swollmanvoid	*grow_buf __P((void *, int));
11019880Swollmanvoid	 remove_node __P((NODE *));
11119880Swollmanvoid	 clear_cycle __P((void));
11219880Swollmanvoid	 tsort __P((void));
11319880Swollmanvoid	 usage __P((void));
11419880Swollman
11519880Swollmanint
11619880Swollmanmain(argc, argv)
11719880Swollman	int argc;
11819880Swollman	char *argv[];
11919880Swollman{
12019880Swollman	register BUF *b;
12119880Swollman	register int c, n;
12218316Swollman	FILE *fp;
12318316Swollman	int bsize, ch, nused;
12418316Swollman	BUF bufs[2];
12518316Swollman
12618316Swollman	while ((ch = getopt(argc, argv, "dlq")) != -1)
12718316Swollman		switch (ch) {
12818316Swollman		case 'd':
12918316Swollman			debug = 1;
13018316Swollman			break;
13118316Swollman		case 'l':
13218316Swollman			longest = 1;
13318316Swollman			break;
13418316Swollman		case 'q':
13518316Swollman			quiet = 1;
13618316Swollman			break;
13718316Swollman		case '?':
13818316Swollman		default:
13918316Swollman			usage();
14018316Swollman		}
14118316Swollman	argc -= optind;
14218316Swollman	argv += optind;
14346303Smarkm
14418316Swollman	switch (argc) {
14518316Swollman	case 0:
14618316Swollman		fp = stdin;
14718316Swollman		break;
14818316Swollman	case 1:
14918316Swollman		if ((fp = fopen(*argv, "r")) == NULL)
15018316Swollman			err(1, "%s", *argv);
15118316Swollman		break;
15218316Swollman	default:
15318316Swollman		usage();
15418316Swollman	}
15518316Swollman
15618316Swollman	for (b = bufs, n = 2; --n >= 0; b++)
15718316Swollman		b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
15846303Smarkm
15918316Swollman	/* parse input and build the graph */
16018316Swollman	for (n = 0, c = getc(fp);;) {
16118316Swollman		while (c != EOF && isspace(c))
16218316Swollman			c = getc(fp);
16318316Swollman		if (c == EOF)
16418316Swollman			break;
16518316Swollman
16618316Swollman		nused = 0;
16718316Swollman		b = &bufs[n];
16818316Swollman		bsize = b->b_bsize;
16920339Swollman		do {
17020339Swollman			b->b_buf[nused++] = c;
17118316Swollman			if (nused == bsize)
17218316Swollman				b->b_buf = grow_buf(b->b_buf, bsize *= 2);
17318316Swollman			c = getc(fp);
17420339Swollman		} while (c != EOF && !isspace(c));
17520339Swollman
17618316Swollman		b->b_buf[nused] = '\0';
17720339Swollman		b->b_bsize = bsize;
17818316Swollman		if (n)
17918316Swollman			add_arc(bufs[0].b_buf, bufs[1].b_buf);
18018316Swollman		n = !n;
18118316Swollman	}
18218316Swollman	(void)fclose(fp);
18346303Smarkm	if (n)
18418316Swollman		errx(1, "odd data count");
18518316Swollman
18618316Swollman	/* do the sort */
18718316Swollman	tsort();
18818316Swollman	exit(0);
18918316Swollman}
19018316Swollman
19146303Smarkm/* double the size of oldbuf and return a pointer to the new buffer. */
19218316Swollmanvoid *
19318316Swollmangrow_buf(bp, size)
19418316Swollman	void *bp;
19518316Swollman	int size;
19618316Swollman{
19746303Smarkm	if ((bp = realloc(bp, (u_int)size)) == NULL)
19846303Smarkm		err(1, NULL);
19918316Swollman	return (bp);
20018316Swollman}
20118316Swollman
20218316Swollman/*
20318316Swollman * add an arc from node s1 to node s2 in the graph.  If s1 or s2 are not in
20418316Swollman * the graph, then add them.
20518316Swollman */
20646303Smarkmvoid
20720339Swollmanadd_arc(s1, s2)
20820339Swollman	char *s1, *s2;
20920339Swollman{
21018316Swollman	register NODE *n1;
21146303Smarkm	NODE *n2;
21246303Smarkm	int bsize, i;
21346303Smarkm
21446303Smarkm	n1 = get_node(s1);
21546303Smarkm
21646303Smarkm	if (!strcmp(s1, s2))
21718316Swollman		return;
21818316Swollman
21920339Swollman	n2 = get_node(s2);
22018316Swollman
22118316Swollman	/*
22218316Swollman	 * Check if this arc is already here.
22318316Swollman	 */
22418316Swollman	for (i = 0; i < n1->n_narcs; i++)
22518316Swollman		if (n1->n_arcs[i] == n2)
22618316Swollman			return;
22718316Swollman	/*
22818316Swollman	 * Add it.
22946303Smarkm	 */
23018316Swollman	if (n1->n_narcs == n1->n_arcsize) {
23118316Swollman		if (!n1->n_arcsize)
23218316Swollman			n1->n_arcsize = 10;
23318316Swollman		bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
23418316Swollman		n1->n_arcs = grow_buf(n1->n_arcs, bsize);
23546303Smarkm		n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
23618316Swollman	}
23718316Swollman	n1->n_arcs[n1->n_narcs++] = n2;
23818316Swollman	++n2->n_refcnt;
23918316Swollman}
24018316Swollman
24118316Swollman/* Find a node in the graph (insert if not found) and return a pointer to it. */
24218316SwollmanNODE *
24318316Swollmanget_node(name)
24446303Smarkm	char *name;
24518316Swollman{
24646303Smarkm	DBT data, key;
24718316Swollman	NODE *n;
24818316Swollman
24918316Swollman	if (db == NULL &&
25018316Swollman	    (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
25118316Swollman		err(1, "db: %s", name);
25220339Swollman
25320339Swollman	key.data = name;
25418316Swollman	key.size = strlen(name) + 1;
25546303Smarkm
25620339Swollman	switch ((*db->get)(db, &key, &data, 0)) {
25718316Swollman	case 0:
25846303Smarkm		bcopy(data.data, &n, sizeof(n));
25920339Swollman		return (n);
26020339Swollman	case 1:
26120339Swollman		break;
26220339Swollman	default:
26320339Swollman	case -1:
26446303Smarkm		err(1, "db: %s", name);
26520339Swollman	}
26620339Swollman
26720339Swollman	if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
26820339Swollman		err(1, NULL);
26920339Swollman
27020339Swollman	n->n_narcs = 0;
27120339Swollman	n->n_arcsize = 0;
27220339Swollman	n->n_arcs = NULL;
27320339Swollman	n->n_refcnt = 0;
27420339Swollman	n->n_flags = 0;
27520339Swollman	bcopy(name, n->n_name, key.size);
27620339Swollman
27720339Swollman	/* Add to linked list. */
27820339Swollman	if ((n->n_next = graph) != NULL)
27920339Swollman		graph->n_prevp = &n->n_next;
28020339Swollman	n->n_prevp = &graph;
28120339Swollman	graph = n;
28220339Swollman
28320339Swollman	/* Add to hash table. */
28420339Swollman	data.data = &n;
28520339Swollman	data.size = sizeof(n);
28620339Swollman	if ((*db->put)(db, &key, &data, 0))
28720339Swollman		err(1, "db: %s", name);
28820339Swollman	return (n);
28920339Swollman}
29020339Swollman
29120339Swollman
29220339Swollman/*
29320339Swollman * Clear the NODEST flag from all nodes.
29420339Swollman */
29520339Swollmanvoid
29620339Swollmanclear_cycle()
29720339Swollman{
29820339Swollman	NODE *n;
29946303Smarkm
30046303Smarkm	for (n = graph; n != NULL; n = n->n_next)
30120339Swollman		n->n_flags &= ~NF_NODEST;
30220339Swollman}
30318316Swollman
30418316Swollman/* do topological sort on graph */
30546303Smarkmvoid
30618316Swollmantsort()
30718316Swollman{
30820339Swollman	register NODE *n, *next;
30920339Swollman	register int cnt, i;
31020339Swollman
31118316Swollman	while (graph != NULL) {
31220339Swollman		/*
31320339Swollman		 * Keep getting rid of simple cases until there are none left,
31420339Swollman		 * if there are any nodes still in the graph, then there is
31520339Swollman		 * a cycle in it.
31620339Swollman		 */
31720339Swollman		do {
31820339Swollman			for (cnt = 0, n = graph; n != NULL; n = next) {
31920339Swollman				next = n->n_next;
32020339Swollman				if (n->n_refcnt == 0) {
32120339Swollman					remove_node(n);
32218316Swollman					++cnt;
32318316Swollman				}
32418316Swollman			}
32518316Swollman		} while (graph != NULL && cnt);
32618316Swollman
32718316Swollman		if (graph == NULL)
32818316Swollman			break;
32920339Swollman
33020339Swollman		if (!cycle_buf) {
33120339Swollman			/*
33220339Swollman			 * Allocate space for two cycle logs - one to be used
33320339Swollman			 * as scratch space, the other to save the longest
33420339Swollman			 * cycle.
33520339Swollman			 */
33620339Swollman			for (cnt = 0, n = graph; n != NULL; n = n->n_next)
33720339Swollman				++cnt;
33820339Swollman			cycle_buf = malloc((u_int)sizeof(NODE *) * cnt);
33920339Swollman			longest_cycle = malloc((u_int)sizeof(NODE *) * cnt);
34020339Swollman			if (cycle_buf == NULL || longest_cycle == NULL)
34120339Swollman				err(1, NULL);
34220339Swollman		}
34320339Swollman		for (n = graph; n != NULL; n = n->n_next)
34420339Swollman			if (!(n->n_flags & NF_ACYCLIC)) {
34520339Swollman				if ((cnt = find_cycle(n, n, 0, 0))) {
34620339Swollman					if (!quiet) {
34746303Smarkm						warnx("cycle in data");
34818316Swollman						for (i = 0; i < cnt; i++)
34918316Swollman							warnx("%s",
35018316Swollman							    longest_cycle[i]->n_name);
35118316Swollman					}
35218316Swollman					remove_node(n);
35320339Swollman					clear_cycle();
35420339Swollman					break;
35520339Swollman				} else {
35620339Swollman					/* to avoid further checks */
35720339Swollman					n->n_flags  |= NF_ACYCLIC;
35820339Swollman					clear_cycle();
35920339Swollman				}
36020339Swollman			}
36120339Swollman
36220339Swollman		if (n == NULL)
36318316Swollman			errx(1, "internal error -- could not find cycle");
36418316Swollman	}
36518316Swollman}
36646303Smarkm
36718316Swollman/* print node and remove from graph (does not actually free node) */
36846303Smarkmvoid
36918316Swollmanremove_node(n)
37020339Swollman	register NODE *n;
37120339Swollman{
37220339Swollman	register NODE **np;
37318316Swollman	register int i;
37420339Swollman
37520339Swollman	(void)printf("%s\n", n->n_name);
37620339Swollman	for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
37720339Swollman		--(*np)->n_refcnt;
37820339Swollman	n->n_narcs = 0;
37919880Swollman	*n->n_prevp = n->n_next;
38020339Swollman	if (n->n_next)
38120339Swollman		n->n_next->n_prevp = n->n_prevp;
38220339Swollman}
38318316Swollman
38418316Swollman
38518316Swollman/* look for the longest? cycle from node from to node to. */
38618316Swollmanint
38718316Swollmanfind_cycle(from, to, longest_len, depth)
38846303Smarkm	NODE *from, *to;
38918316Swollman	int depth, longest_len;
39018316Swollman{
39146303Smarkm	register NODE **np;
39218316Swollman	register int i, len;
39318316Swollman
39418316Swollman	/*
39518316Swollman	 * avoid infinite loops and ignore portions of the graph known
39618316Swollman	 * to be acyclic
39746303Smarkm	 */
39818316Swollman	if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
39918316Swollman		return (0);
40046303Smarkm	from->n_flags |= NF_MARK;
40118316Swollman
40218316Swollman	for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
40318316Swollman		cycle_buf[depth] = *np;
40420339Swollman		if (*np == to) {
40518316Swollman			if (depth + 1 > longest_len) {
40618316Swollman				longest_len = depth + 1;
40720339Swollman				(void)memcpy((char *)longest_cycle,
40818316Swollman				    (char *)cycle_buf,
40920339Swollman				    longest_len * sizeof(NODE *));
41020339Swollman			}
41118316Swollman		} else {
41220339Swollman			if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
41320339Swollman				continue;
41420339Swollman			len = find_cycle(*np, to, longest_len, depth + 1);
41520339Swollman
41620339Swollman			if (debug)
41720339Swollman				(void)printf("%*s %s->%s %d\n", depth, "",
41820339Swollman				    from->n_name, to->n_name, len);
41920339Swollman
42020339Swollman			if (len == 0)
42120339Swollman				(*np)->n_flags |= NF_NODEST;
42218316Swollman
42318316Swollman			if (len > longest_len)
42420339Swollman				longest_len = len;
42520339Swollman
42618316Swollman			if (len > 0 && !longest)
42718316Swollman				break;
42818316Swollman		}
42918316Swollman	}
43018316Swollman	from->n_flags &= ~NF_MARK;
43118316Swollman	return (longest_len);
43218316Swollman}
43318316Swollman
43418316Swollmanvoid
43518316Swollmanusage()
43618316Swollman{
43718316Swollman	(void)fprintf(stderr, "usage: tsort [-dlq] [file]\n");
43818316Swollman	exit(1);
43918316Swollman}
44018316Swollman