11590Srgrimes/*
21590Srgrimes * Copyright (c) 1989, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * This code is derived from software contributed to Berkeley by
61590Srgrimes * Michael Rendell of Memorial University of Newfoundland.
71590Srgrimes *
81590Srgrimes * Redistribution and use in source and binary forms, with or without
91590Srgrimes * modification, are permitted provided that the following conditions
101590Srgrimes * are met:
111590Srgrimes * 1. Redistributions of source code must retain the above copyright
121590Srgrimes *    notice, this list of conditions and the following disclaimer.
131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer in the
151590Srgrimes *    documentation and/or other materials provided with the distribution.
161590Srgrimes * 4. Neither the name of the University nor the names of its contributors
171590Srgrimes *    may be used to endorse or promote products derived from this software
181590Srgrimes *    without specific prior written permission.
191590Srgrimes *
201590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
211590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
221590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
231590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
241590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
251590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
261590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
271590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
281590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
291590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
301590Srgrimes * SUCH DAMAGE.
311590Srgrimes */
321590Srgrimes
331590Srgrimes#ifndef lint
3487301Sdwmalonestatic const char copyright[] =
351590Srgrimes"@(#) Copyright (c) 1989, 1993, 1994\n\
361590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
371590Srgrimes#endif /* not lint */
381590Srgrimes
391590Srgrimes#ifndef lint
4087301Sdwmalonestatic const char sccsid[] = "@(#)tsort.c	8.3 (Berkeley) 5/4/95";
411590Srgrimes#endif /* not lint */
421590Srgrimes
43146751Scharnier#include <sys/cdefs.h>
44146751Scharnier__FBSDID("$FreeBSD$");
45146751Scharnier
461590Srgrimes#include <sys/types.h>
471590Srgrimes
481590Srgrimes#include <ctype.h>
491590Srgrimes#include <db.h>
501590Srgrimes#include <err.h>
51200462Sdelphij#include <errno.h>
521590Srgrimes#include <fcntl.h>
531590Srgrimes#include <stdio.h>
541590Srgrimes#include <stdlib.h>
551590Srgrimes#include <string.h>
5623710Speter#include <unistd.h>
571590Srgrimes
581590Srgrimes/*
591590Srgrimes *  Topological sort.  Input is a list of pairs of strings separated by
601590Srgrimes *  white space (spaces, tabs, and/or newlines); strings are written to
611590Srgrimes *  standard output in sorted order, one per line.
621590Srgrimes *
631590Srgrimes *  usage:
6417389Sjkh *     tsort [-dlq] [inputfile]
651590Srgrimes *  If no input file is specified, standard input is read.
661590Srgrimes *
6772093Sasmodai *  Should be compatible with AT&T tsort HOWEVER the output is not identical
681590Srgrimes *  (i.e. for most graphs there is more than one sorted order, and this tsort
691590Srgrimes *  usually generates a different one then the AT&T tsort).  Also, cycle
701590Srgrimes *  reporting seems to be more accurate in this version (the AT&T tsort
711590Srgrimes *  sometimes says a node is in a cycle when it isn't).
721590Srgrimes *
731590Srgrimes *  Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90
741590Srgrimes */
75119747Sdds
761590Srgrimes#define	NF_MARK		0x1		/* marker for cycle detection */
771590Srgrimes#define	NF_ACYCLIC	0x2		/* this node is cycle free */
781590Srgrimes#define	NF_NODEST	0x4		/* Unreachable */
791590Srgrimes
801590Srgrimes
811590Srgrimestypedef struct node_str NODE;
821590Srgrimes
831590Srgrimesstruct node_str {
841590Srgrimes	NODE **n_prevp;			/* pointer to previous node's n_next */
851590Srgrimes	NODE *n_next;			/* next node in graph */
861590Srgrimes	NODE **n_arcs;			/* array of arcs to other nodes */
871590Srgrimes	int n_narcs;			/* number of arcs in n_arcs[] */
881590Srgrimes	int n_arcsize;			/* size of n_arcs[] array */
891590Srgrimes	int n_refcnt;			/* # of arcs pointing to this node */
901590Srgrimes	int n_flags;			/* NF_* */
911590Srgrimes	char n_name[1];			/* name of this node */
921590Srgrimes};
931590Srgrimes
941590Srgrimestypedef struct _buf {
951590Srgrimes	char *b_buf;
961590Srgrimes	int b_bsize;
971590Srgrimes} BUF;
981590Srgrimes
991590SrgrimesDB *db;
1001590SrgrimesNODE *graph, **cycle_buf, **longest_cycle;
10117389Sjkhint debug, longest, quiet;
1021590Srgrimes
10392922Simpvoid	 add_arc(char *, char *);
10492922Simpint	 find_cycle(NODE *, NODE *, int, int);
10592922SimpNODE	*get_node(char *);
10696785Sjmallettvoid	*grow_buf(void *, size_t);
10792922Simpvoid	 remove_node(NODE *);
10892922Simpvoid	 clear_cycle(void);
10992922Simpvoid	 tsort(void);
11092922Simpvoid	 usage(void);
1111590Srgrimes
1121590Srgrimesint
113102944Sdwmalonemain(int argc, char *argv[])
1141590Srgrimes{
115102944Sdwmalone	BUF *b;
116102944Sdwmalone	int c, n;
1171590Srgrimes	FILE *fp;
1181590Srgrimes	int bsize, ch, nused;
1191590Srgrimes	BUF bufs[2];
1201590Srgrimes
121146751Scharnier	fp = NULL;
12224360Simp	while ((ch = getopt(argc, argv, "dlq")) != -1)
1231590Srgrimes		switch (ch) {
1241590Srgrimes		case 'd':
1251590Srgrimes			debug = 1;
1261590Srgrimes			break;
1271590Srgrimes		case 'l':
1281590Srgrimes			longest = 1;
1291590Srgrimes			break;
13017389Sjkh		case 'q':
13117389Sjkh			quiet = 1;
13217389Sjkh			break;
1331590Srgrimes		case '?':
1341590Srgrimes		default:
1351590Srgrimes			usage();
1361590Srgrimes		}
1371590Srgrimes	argc -= optind;
1381590Srgrimes	argv += optind;
1391590Srgrimes
1401590Srgrimes	switch (argc) {
1411590Srgrimes	case 0:
1421590Srgrimes		fp = stdin;
1431590Srgrimes		break;
1441590Srgrimes	case 1:
1451590Srgrimes		if ((fp = fopen(*argv, "r")) == NULL)
1461590Srgrimes			err(1, "%s", *argv);
1471590Srgrimes		break;
1481590Srgrimes	default:
1491590Srgrimes		usage();
1501590Srgrimes	}
1511590Srgrimes
1521590Srgrimes	for (b = bufs, n = 2; --n >= 0; b++)
1531590Srgrimes		b->b_buf = grow_buf(NULL, b->b_bsize = 1024);
1541590Srgrimes
1551590Srgrimes	/* parse input and build the graph */
1561590Srgrimes	for (n = 0, c = getc(fp);;) {
1571590Srgrimes		while (c != EOF && isspace(c))
1581590Srgrimes			c = getc(fp);
1591590Srgrimes		if (c == EOF)
1601590Srgrimes			break;
1611590Srgrimes
1621590Srgrimes		nused = 0;
1631590Srgrimes		b = &bufs[n];
1641590Srgrimes		bsize = b->b_bsize;
1651590Srgrimes		do {
1661590Srgrimes			b->b_buf[nused++] = c;
1671590Srgrimes			if (nused == bsize)
1681590Srgrimes				b->b_buf = grow_buf(b->b_buf, bsize *= 2);
1691590Srgrimes			c = getc(fp);
1701590Srgrimes		} while (c != EOF && !isspace(c));
1711590Srgrimes
1721590Srgrimes		b->b_buf[nused] = '\0';
1731590Srgrimes		b->b_bsize = bsize;
1741590Srgrimes		if (n)
1751590Srgrimes			add_arc(bufs[0].b_buf, bufs[1].b_buf);
1761590Srgrimes		n = !n;
1771590Srgrimes	}
1781590Srgrimes	(void)fclose(fp);
1791590Srgrimes	if (n)
1801590Srgrimes		errx(1, "odd data count");
1811590Srgrimes
1821590Srgrimes	/* do the sort */
1831590Srgrimes	tsort();
1841590Srgrimes	exit(0);
1851590Srgrimes}
1861590Srgrimes
1871590Srgrimes/* double the size of oldbuf and return a pointer to the new buffer. */
1881590Srgrimesvoid *
189102944Sdwmalonegrow_buf(void *bp, size_t size)
1901590Srgrimes{
19196785Sjmallett	if ((bp = realloc(bp, size)) == NULL)
1921590Srgrimes		err(1, NULL);
1931590Srgrimes	return (bp);
1941590Srgrimes}
1951590Srgrimes
1961590Srgrimes/*
1971590Srgrimes * add an arc from node s1 to node s2 in the graph.  If s1 or s2 are not in
1981590Srgrimes * the graph, then add them.
1991590Srgrimes */
2001590Srgrimesvoid
201102944Sdwmaloneadd_arc(char *s1, char *s2)
2021590Srgrimes{
203102944Sdwmalone	NODE *n1;
2041590Srgrimes	NODE *n2;
2051590Srgrimes	int bsize, i;
2061590Srgrimes
2071590Srgrimes	n1 = get_node(s1);
2081590Srgrimes
2091590Srgrimes	if (!strcmp(s1, s2))
2101590Srgrimes		return;
2111590Srgrimes
2121590Srgrimes	n2 = get_node(s2);
2131590Srgrimes
2141590Srgrimes	/*
2151590Srgrimes	 * Check if this arc is already here.
2161590Srgrimes	 */
2171590Srgrimes	for (i = 0; i < n1->n_narcs; i++)
2181590Srgrimes		if (n1->n_arcs[i] == n2)
2191590Srgrimes			return;
2201590Srgrimes	/*
2211590Srgrimes	 * Add it.
2221590Srgrimes	 */
2231590Srgrimes	if (n1->n_narcs == n1->n_arcsize) {
2241590Srgrimes		if (!n1->n_arcsize)
2251590Srgrimes			n1->n_arcsize = 10;
2261590Srgrimes		bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2;
2271590Srgrimes		n1->n_arcs = grow_buf(n1->n_arcs, bsize);
2281590Srgrimes		n1->n_arcsize = bsize / sizeof(*n1->n_arcs);
2291590Srgrimes	}
2301590Srgrimes	n1->n_arcs[n1->n_narcs++] = n2;
2311590Srgrimes	++n2->n_refcnt;
2321590Srgrimes}
2331590Srgrimes
2341590Srgrimes/* Find a node in the graph (insert if not found) and return a pointer to it. */
2351590SrgrimesNODE *
236102944Sdwmaloneget_node(char *name)
2371590Srgrimes{
2381590Srgrimes	DBT data, key;
2391590Srgrimes	NODE *n;
2401590Srgrimes
2411590Srgrimes	if (db == NULL &&
2421590Srgrimes	    (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL)
2431590Srgrimes		err(1, "db: %s", name);
2441590Srgrimes
2451590Srgrimes	key.data = name;
2461590Srgrimes	key.size = strlen(name) + 1;
2471590Srgrimes
2481590Srgrimes	switch ((*db->get)(db, &key, &data, 0)) {
2491590Srgrimes	case 0:
2501590Srgrimes		bcopy(data.data, &n, sizeof(n));
2511590Srgrimes		return (n);
2521590Srgrimes	case 1:
2531590Srgrimes		break;
2541590Srgrimes	default:
2551590Srgrimes	case -1:
2561590Srgrimes		err(1, "db: %s", name);
2571590Srgrimes	}
2581590Srgrimes
2591590Srgrimes	if ((n = malloc(sizeof(NODE) + key.size)) == NULL)
2601590Srgrimes		err(1, NULL);
2611590Srgrimes
2621590Srgrimes	n->n_narcs = 0;
2631590Srgrimes	n->n_arcsize = 0;
2641590Srgrimes	n->n_arcs = NULL;
2651590Srgrimes	n->n_refcnt = 0;
2661590Srgrimes	n->n_flags = 0;
2671590Srgrimes	bcopy(name, n->n_name, key.size);
2681590Srgrimes
2691590Srgrimes	/* Add to linked list. */
2701590Srgrimes	if ((n->n_next = graph) != NULL)
2711590Srgrimes		graph->n_prevp = &n->n_next;
2721590Srgrimes	n->n_prevp = &graph;
2731590Srgrimes	graph = n;
2741590Srgrimes
2751590Srgrimes	/* Add to hash table. */
2761590Srgrimes	data.data = &n;
2771590Srgrimes	data.size = sizeof(n);
2781590Srgrimes	if ((*db->put)(db, &key, &data, 0))
2791590Srgrimes		err(1, "db: %s", name);
2801590Srgrimes	return (n);
2811590Srgrimes}
2821590Srgrimes
2831590Srgrimes
2841590Srgrimes/*
2851590Srgrimes * Clear the NODEST flag from all nodes.
2861590Srgrimes */
2871590Srgrimesvoid
288102944Sdwmaloneclear_cycle(void)
2891590Srgrimes{
2901590Srgrimes	NODE *n;
2911590Srgrimes
2921590Srgrimes	for (n = graph; n != NULL; n = n->n_next)
2931590Srgrimes		n->n_flags &= ~NF_NODEST;
2941590Srgrimes}
2951590Srgrimes
2961590Srgrimes/* do topological sort on graph */
2971590Srgrimesvoid
298102944Sdwmalonetsort(void)
2991590Srgrimes{
300102944Sdwmalone	NODE *n, *next;
301102944Sdwmalone	int cnt, i;
3021590Srgrimes
3031590Srgrimes	while (graph != NULL) {
3041590Srgrimes		/*
3051590Srgrimes		 * Keep getting rid of simple cases until there are none left,
3061590Srgrimes		 * if there are any nodes still in the graph, then there is
3071590Srgrimes		 * a cycle in it.
3081590Srgrimes		 */
3091590Srgrimes		do {
3101590Srgrimes			for (cnt = 0, n = graph; n != NULL; n = next) {
3111590Srgrimes				next = n->n_next;
3121590Srgrimes				if (n->n_refcnt == 0) {
3131590Srgrimes					remove_node(n);
3141590Srgrimes					++cnt;
3151590Srgrimes				}
3161590Srgrimes			}
3171590Srgrimes		} while (graph != NULL && cnt);
3181590Srgrimes
3191590Srgrimes		if (graph == NULL)
3201590Srgrimes			break;
3211590Srgrimes
3221590Srgrimes		if (!cycle_buf) {
3231590Srgrimes			/*
3241590Srgrimes			 * Allocate space for two cycle logs - one to be used
3251590Srgrimes			 * as scratch space, the other to save the longest
3261590Srgrimes			 * cycle.
3271590Srgrimes			 */
3281590Srgrimes			for (cnt = 0, n = graph; n != NULL; n = n->n_next)
3291590Srgrimes				++cnt;
33096785Sjmallett			cycle_buf = malloc(sizeof(NODE *) * cnt);
33196785Sjmallett			longest_cycle = malloc(sizeof(NODE *) * cnt);
3321590Srgrimes			if (cycle_buf == NULL || longest_cycle == NULL)
3331590Srgrimes				err(1, NULL);
3341590Srgrimes		}
3351590Srgrimes		for (n = graph; n != NULL; n = n->n_next)
33648566Sbillf			if (!(n->n_flags & NF_ACYCLIC)) {
33717389Sjkh				if ((cnt = find_cycle(n, n, 0, 0))) {
33817389Sjkh					if (!quiet) {
33917389Sjkh						warnx("cycle in data");
34017389Sjkh						for (i = 0; i < cnt; i++)
34117389Sjkh							warnx("%s",
34217389Sjkh							    longest_cycle[i]->n_name);
34317389Sjkh					}
3441590Srgrimes					remove_node(n);
3451590Srgrimes					clear_cycle();
3461590Srgrimes					break;
3471590Srgrimes				} else {
3481590Srgrimes					/* to avoid further checks */
3491590Srgrimes					n->n_flags  |= NF_ACYCLIC;
3501590Srgrimes					clear_cycle();
3511590Srgrimes				}
35248566Sbillf			}
3531590Srgrimes
3541590Srgrimes		if (n == NULL)
3551590Srgrimes			errx(1, "internal error -- could not find cycle");
3561590Srgrimes	}
3571590Srgrimes}
3581590Srgrimes
3591590Srgrimes/* print node and remove from graph (does not actually free node) */
3601590Srgrimesvoid
361102944Sdwmaloneremove_node(NODE *n)
3621590Srgrimes{
363102944Sdwmalone	NODE **np;
364102944Sdwmalone	int i;
3651590Srgrimes
3661590Srgrimes	(void)printf("%s\n", n->n_name);
3671590Srgrimes	for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++)
3681590Srgrimes		--(*np)->n_refcnt;
3691590Srgrimes	n->n_narcs = 0;
3701590Srgrimes	*n->n_prevp = n->n_next;
3711590Srgrimes	if (n->n_next)
3721590Srgrimes		n->n_next->n_prevp = n->n_prevp;
3731590Srgrimes}
3741590Srgrimes
3751590Srgrimes
3761590Srgrimes/* look for the longest? cycle from node from to node to. */
3771590Srgrimesint
378102944Sdwmalonefind_cycle(NODE *from, NODE *to, int longest_len, int depth)
3791590Srgrimes{
380102944Sdwmalone	NODE **np;
381102944Sdwmalone	int i, len;
3821590Srgrimes
3831590Srgrimes	/*
3841590Srgrimes	 * avoid infinite loops and ignore portions of the graph known
3851590Srgrimes	 * to be acyclic
3861590Srgrimes	 */
3871590Srgrimes	if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC))
3881590Srgrimes		return (0);
3891590Srgrimes	from->n_flags |= NF_MARK;
3901590Srgrimes
3911590Srgrimes	for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) {
3921590Srgrimes		cycle_buf[depth] = *np;
3931590Srgrimes		if (*np == to) {
3941590Srgrimes			if (depth + 1 > longest_len) {
3951590Srgrimes				longest_len = depth + 1;
3961590Srgrimes				(void)memcpy((char *)longest_cycle,
3971590Srgrimes				    (char *)cycle_buf,
3981590Srgrimes				    longest_len * sizeof(NODE *));
3991590Srgrimes			}
4001590Srgrimes		} else {
4011590Srgrimes			if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST))
4021590Srgrimes				continue;
4031590Srgrimes			len = find_cycle(*np, to, longest_len, depth + 1);
4041590Srgrimes
4051590Srgrimes			if (debug)
4061590Srgrimes				(void)printf("%*s %s->%s %d\n", depth, "",
4071590Srgrimes				    from->n_name, to->n_name, len);
4081590Srgrimes
4091590Srgrimes			if (len == 0)
4101590Srgrimes				(*np)->n_flags |= NF_NODEST;
4111590Srgrimes
4121590Srgrimes			if (len > longest_len)
4131590Srgrimes				longest_len = len;
4141590Srgrimes
4151590Srgrimes			if (len > 0 && !longest)
4161590Srgrimes				break;
4171590Srgrimes		}
4181590Srgrimes	}
4191590Srgrimes	from->n_flags &= ~NF_MARK;
4201590Srgrimes	return (longest_len);
4211590Srgrimes}
4221590Srgrimes
4231590Srgrimesvoid
424102944Sdwmaloneusage(void)
4251590Srgrimes{
42617389Sjkh	(void)fprintf(stderr, "usage: tsort [-dlq] [file]\n");
4271590Srgrimes	exit(1);
4281590Srgrimes}
429