tsort.c revision 87301
118316Swollman/* 218316Swollman * Copyright (c) 1989, 1993, 1994 318316Swollman * The Regents of the University of California. All rights reserved. 418316Swollman * 518316Swollman * This code is derived from software contributed to Berkeley by 618316Swollman * Michael Rendell of Memorial University of Newfoundland. 718316Swollman * 818316Swollman * Redistribution and use in source and binary forms, with or without 918316Swollman * modification, are permitted provided that the following conditions 1018316Swollman * are met: 1118316Swollman * 1. Redistributions of source code must retain the above copyright 1218316Swollman * notice, this list of conditions and the following disclaimer. 1318316Swollman * 2. Redistributions in binary form must reproduce the above copyright 1446303Smarkm * notice, this list of conditions and the following disclaimer in the 1518316Swollman * documentation and/or other materials provided with the distribution. 1618316Swollman * 3. All advertising materials mentioning features or use of this software 1718316Swollman * must display the following acknowledgement: 1818316Swollman * This product includes software developed by the University of 1918316Swollman * California, Berkeley and its contributors. 2018316Swollman * 4. Neither the name of the University nor the names of its contributors 2118316Swollman * may be used to endorse or promote products derived from this software 2218316Swollman * without specific prior written permission. 2318316Swollman * 2418316Swollman * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 2518316Swollman * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 2618316Swollman * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2718316Swollman * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2818316Swollman * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2918316Swollman * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 3018316Swollman * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 3118316Swollman * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 3246303Smarkm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 3350476Speter * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3418316Swollman * SUCH DAMAGE. 3518316Swollman * 3618316Swollman * $FreeBSD: head/usr.bin/tsort/tsort.c 87301 2001-12-03 21:32:54Z dwmalone $ 3718316Swollman */ 3818316Swollman 3918316Swollman#ifndef lint 4046303Smarkmstatic const char copyright[] = 4118316Swollman"@(#) Copyright (c) 1989, 1993, 1994\n\ 4218316Swollman The Regents of the University of California. All rights reserved.\n"; 4346303Smarkm#endif /* not lint */ 4446303Smarkm 4546303Smarkm#ifndef lint 4646303Smarkmstatic const char sccsid[] = "@(#)tsort.c 8.3 (Berkeley) 5/4/95"; 4746303Smarkm#endif /* not lint */ 4850969Speter 4946303Smarkm#include <sys/types.h> 5046303Smarkm 5118316Swollman#include <ctype.h> 52102231Strhodes#include <db.h> 5318316Swollman#include <err.h> 5418316Swollman#include <errno.h> 5518316Swollman#include <fcntl.h> 5618316Swollman#include <stdio.h> 5718316Swollman#include <stdlib.h> 5820339Swollman#include <string.h> 5981604Speter#include <unistd.h> 6046303Smarkm 61118582Simp/* 62118582Simp * Topological sort. Input is a list of pairs of strings separated by 6320339Swollman * white space (spaces, tabs, and/or newlines); strings are written to 6418316Swollman * standard output in sorted order, one per line. 6518316Swollman * 6646303Smarkm * usage: 6718316Swollman * tsort [-dlq] [inputfile] 6818316Swollman * If no input file is specified, standard input is read. 6919880Swollman * 7019880Swollman * Should be compatible with AT&T tsort HOWEVER the output is not identical 7119880Swollman * (i.e. for most graphs there is more than one sorted order, and this tsort 7219880Swollman * usually generates a different one then the AT&T tsort). Also, cycle 7319880Swollman * reporting seems to be more accurate in this version (the AT&T tsort 7419880Swollman * sometimes says a node is in a cycle when it isn't). 7519880Swollman * 7619880Swollman * Michael Rendell, michael@stretch.cs.mun.ca - Feb 26, '90 7719880Swollman */ 7819880Swollman#define HASHSIZE 53 /* doesn't need to be big */ 7919880Swollman#define NF_MARK 0x1 /* marker for cycle detection */ 8019880Swollman#define NF_ACYCLIC 0x2 /* this node is cycle free */ 8119880Swollman#define NF_NODEST 0x4 /* Unreachable */ 8219880Swollman 8319880Swollman 8419880Swollmantypedef struct node_str NODE; 8519880Swollman 8619880Swollmanstruct node_str { 8719880Swollman NODE **n_prevp; /* pointer to previous node's n_next */ 8819880Swollman NODE *n_next; /* next node in graph */ 8919880Swollman NODE **n_arcs; /* array of arcs to other nodes */ 9019880Swollman int n_narcs; /* number of arcs in n_arcs[] */ 9119880Swollman int n_arcsize; /* size of n_arcs[] array */ 9219880Swollman int n_refcnt; /* # of arcs pointing to this node */ 9319880Swollman int n_flags; /* NF_* */ 9419880Swollman char n_name[1]; /* name of this node */ 9519880Swollman}; 9619880Swollman 9719880Swollmantypedef struct _buf { 9819880Swollman char *b_buf; 9919880Swollman int b_bsize; 10019880Swollman} BUF; 10119880Swollman 10219880SwollmanDB *db; 10319880SwollmanNODE *graph, **cycle_buf, **longest_cycle; 10419880Swollmanint debug, longest, quiet; 10519880Swollman 10619880Swollmanvoid add_arc __P((char *, char *)); 10719880Swollmanint find_cycle __P((NODE *, NODE *, int, int)); 10819880SwollmanNODE *get_node __P((char *)); 10919880Swollmanvoid *grow_buf __P((void *, int)); 11019880Swollmanvoid remove_node __P((NODE *)); 11119880Swollmanvoid clear_cycle __P((void)); 11219880Swollmanvoid tsort __P((void)); 11319880Swollmanvoid usage __P((void)); 11419880Swollman 11519880Swollmanint 11619880Swollmanmain(argc, argv) 11719880Swollman int argc; 11819880Swollman char *argv[]; 11919880Swollman{ 12019880Swollman register BUF *b; 12119880Swollman register int c, n; 12218316Swollman FILE *fp; 12318316Swollman int bsize, ch, nused; 12418316Swollman BUF bufs[2]; 12518316Swollman 12618316Swollman while ((ch = getopt(argc, argv, "dlq")) != -1) 12718316Swollman switch (ch) { 12818316Swollman case 'd': 12918316Swollman debug = 1; 13018316Swollman break; 13118316Swollman case 'l': 13218316Swollman longest = 1; 13318316Swollman break; 13418316Swollman case 'q': 13518316Swollman quiet = 1; 13618316Swollman break; 13718316Swollman case '?': 13818316Swollman default: 13918316Swollman usage(); 14018316Swollman } 14118316Swollman argc -= optind; 14218316Swollman argv += optind; 14346303Smarkm 14418316Swollman switch (argc) { 14518316Swollman case 0: 14618316Swollman fp = stdin; 14718316Swollman break; 14818316Swollman case 1: 14918316Swollman if ((fp = fopen(*argv, "r")) == NULL) 15018316Swollman err(1, "%s", *argv); 15118316Swollman break; 15218316Swollman default: 15318316Swollman usage(); 15418316Swollman } 15518316Swollman 15618316Swollman for (b = bufs, n = 2; --n >= 0; b++) 15718316Swollman b->b_buf = grow_buf(NULL, b->b_bsize = 1024); 15846303Smarkm 15918316Swollman /* parse input and build the graph */ 16018316Swollman for (n = 0, c = getc(fp);;) { 16118316Swollman while (c != EOF && isspace(c)) 16218316Swollman c = getc(fp); 16318316Swollman if (c == EOF) 16418316Swollman break; 16518316Swollman 16618316Swollman nused = 0; 16718316Swollman b = &bufs[n]; 16818316Swollman bsize = b->b_bsize; 16920339Swollman do { 17020339Swollman b->b_buf[nused++] = c; 17118316Swollman if (nused == bsize) 17218316Swollman b->b_buf = grow_buf(b->b_buf, bsize *= 2); 17318316Swollman c = getc(fp); 17420339Swollman } while (c != EOF && !isspace(c)); 17520339Swollman 17618316Swollman b->b_buf[nused] = '\0'; 17720339Swollman b->b_bsize = bsize; 17818316Swollman if (n) 17918316Swollman add_arc(bufs[0].b_buf, bufs[1].b_buf); 18018316Swollman n = !n; 18118316Swollman } 18218316Swollman (void)fclose(fp); 18346303Smarkm if (n) 18418316Swollman errx(1, "odd data count"); 18518316Swollman 18618316Swollman /* do the sort */ 18718316Swollman tsort(); 18818316Swollman exit(0); 18918316Swollman} 19018316Swollman 19146303Smarkm/* double the size of oldbuf and return a pointer to the new buffer. */ 19218316Swollmanvoid * 19318316Swollmangrow_buf(bp, size) 19418316Swollman void *bp; 19518316Swollman int size; 19618316Swollman{ 19746303Smarkm if ((bp = realloc(bp, (u_int)size)) == NULL) 19846303Smarkm err(1, NULL); 19918316Swollman return (bp); 20018316Swollman} 20118316Swollman 20218316Swollman/* 20318316Swollman * add an arc from node s1 to node s2 in the graph. If s1 or s2 are not in 20418316Swollman * the graph, then add them. 20518316Swollman */ 20646303Smarkmvoid 20720339Swollmanadd_arc(s1, s2) 20820339Swollman char *s1, *s2; 20920339Swollman{ 21018316Swollman register NODE *n1; 21146303Smarkm NODE *n2; 21246303Smarkm int bsize, i; 21346303Smarkm 21446303Smarkm n1 = get_node(s1); 21546303Smarkm 21646303Smarkm if (!strcmp(s1, s2)) 21718316Swollman return; 21818316Swollman 21920339Swollman n2 = get_node(s2); 22018316Swollman 22118316Swollman /* 22218316Swollman * Check if this arc is already here. 22318316Swollman */ 22418316Swollman for (i = 0; i < n1->n_narcs; i++) 22518316Swollman if (n1->n_arcs[i] == n2) 22618316Swollman return; 22718316Swollman /* 22818316Swollman * Add it. 22946303Smarkm */ 23018316Swollman if (n1->n_narcs == n1->n_arcsize) { 23118316Swollman if (!n1->n_arcsize) 23218316Swollman n1->n_arcsize = 10; 23318316Swollman bsize = n1->n_arcsize * sizeof(*n1->n_arcs) * 2; 23418316Swollman n1->n_arcs = grow_buf(n1->n_arcs, bsize); 23546303Smarkm n1->n_arcsize = bsize / sizeof(*n1->n_arcs); 23618316Swollman } 23718316Swollman n1->n_arcs[n1->n_narcs++] = n2; 23818316Swollman ++n2->n_refcnt; 23918316Swollman} 24018316Swollman 24118316Swollman/* Find a node in the graph (insert if not found) and return a pointer to it. */ 24218316SwollmanNODE * 24318316Swollmanget_node(name) 24446303Smarkm char *name; 24518316Swollman{ 24646303Smarkm DBT data, key; 24718316Swollman NODE *n; 24818316Swollman 24918316Swollman if (db == NULL && 25018316Swollman (db = dbopen(NULL, O_RDWR, 0, DB_HASH, NULL)) == NULL) 25118316Swollman err(1, "db: %s", name); 25220339Swollman 25320339Swollman key.data = name; 25418316Swollman key.size = strlen(name) + 1; 25546303Smarkm 25620339Swollman switch ((*db->get)(db, &key, &data, 0)) { 25718316Swollman case 0: 25846303Smarkm bcopy(data.data, &n, sizeof(n)); 25920339Swollman return (n); 26020339Swollman case 1: 26120339Swollman break; 26220339Swollman default: 26320339Swollman case -1: 26446303Smarkm err(1, "db: %s", name); 26520339Swollman } 26620339Swollman 26720339Swollman if ((n = malloc(sizeof(NODE) + key.size)) == NULL) 26820339Swollman err(1, NULL); 26920339Swollman 27020339Swollman n->n_narcs = 0; 27120339Swollman n->n_arcsize = 0; 27220339Swollman n->n_arcs = NULL; 27320339Swollman n->n_refcnt = 0; 27420339Swollman n->n_flags = 0; 27520339Swollman bcopy(name, n->n_name, key.size); 27620339Swollman 27720339Swollman /* Add to linked list. */ 27820339Swollman if ((n->n_next = graph) != NULL) 27920339Swollman graph->n_prevp = &n->n_next; 28020339Swollman n->n_prevp = &graph; 28120339Swollman graph = n; 28220339Swollman 28320339Swollman /* Add to hash table. */ 28420339Swollman data.data = &n; 28520339Swollman data.size = sizeof(n); 28620339Swollman if ((*db->put)(db, &key, &data, 0)) 28720339Swollman err(1, "db: %s", name); 28820339Swollman return (n); 28920339Swollman} 29020339Swollman 29120339Swollman 29220339Swollman/* 29320339Swollman * Clear the NODEST flag from all nodes. 29420339Swollman */ 29520339Swollmanvoid 29620339Swollmanclear_cycle() 29720339Swollman{ 29820339Swollman NODE *n; 29946303Smarkm 30046303Smarkm for (n = graph; n != NULL; n = n->n_next) 30120339Swollman n->n_flags &= ~NF_NODEST; 30220339Swollman} 30318316Swollman 30418316Swollman/* do topological sort on graph */ 30546303Smarkmvoid 30618316Swollmantsort() 30718316Swollman{ 30820339Swollman register NODE *n, *next; 30920339Swollman register int cnt, i; 31020339Swollman 31118316Swollman while (graph != NULL) { 31220339Swollman /* 31320339Swollman * Keep getting rid of simple cases until there are none left, 31420339Swollman * if there are any nodes still in the graph, then there is 31520339Swollman * a cycle in it. 31620339Swollman */ 31720339Swollman do { 31820339Swollman for (cnt = 0, n = graph; n != NULL; n = next) { 31920339Swollman next = n->n_next; 32020339Swollman if (n->n_refcnt == 0) { 32120339Swollman remove_node(n); 32218316Swollman ++cnt; 32318316Swollman } 32418316Swollman } 32518316Swollman } while (graph != NULL && cnt); 32618316Swollman 32718316Swollman if (graph == NULL) 32818316Swollman break; 32920339Swollman 33020339Swollman if (!cycle_buf) { 33120339Swollman /* 33220339Swollman * Allocate space for two cycle logs - one to be used 33320339Swollman * as scratch space, the other to save the longest 33420339Swollman * cycle. 33520339Swollman */ 33620339Swollman for (cnt = 0, n = graph; n != NULL; n = n->n_next) 33720339Swollman ++cnt; 33820339Swollman cycle_buf = malloc((u_int)sizeof(NODE *) * cnt); 33920339Swollman longest_cycle = malloc((u_int)sizeof(NODE *) * cnt); 34020339Swollman if (cycle_buf == NULL || longest_cycle == NULL) 34120339Swollman err(1, NULL); 34220339Swollman } 34320339Swollman for (n = graph; n != NULL; n = n->n_next) 34420339Swollman if (!(n->n_flags & NF_ACYCLIC)) { 34520339Swollman if ((cnt = find_cycle(n, n, 0, 0))) { 34620339Swollman if (!quiet) { 34746303Smarkm warnx("cycle in data"); 34818316Swollman for (i = 0; i < cnt; i++) 34918316Swollman warnx("%s", 35018316Swollman longest_cycle[i]->n_name); 35118316Swollman } 35218316Swollman remove_node(n); 35320339Swollman clear_cycle(); 35420339Swollman break; 35520339Swollman } else { 35620339Swollman /* to avoid further checks */ 35720339Swollman n->n_flags |= NF_ACYCLIC; 35820339Swollman clear_cycle(); 35920339Swollman } 36020339Swollman } 36120339Swollman 36220339Swollman if (n == NULL) 36318316Swollman errx(1, "internal error -- could not find cycle"); 36418316Swollman } 36518316Swollman} 36646303Smarkm 36718316Swollman/* print node and remove from graph (does not actually free node) */ 36846303Smarkmvoid 36918316Swollmanremove_node(n) 37020339Swollman register NODE *n; 37120339Swollman{ 37220339Swollman register NODE **np; 37318316Swollman register int i; 37420339Swollman 37520339Swollman (void)printf("%s\n", n->n_name); 37620339Swollman for (np = n->n_arcs, i = n->n_narcs; --i >= 0; np++) 37720339Swollman --(*np)->n_refcnt; 37820339Swollman n->n_narcs = 0; 37919880Swollman *n->n_prevp = n->n_next; 38020339Swollman if (n->n_next) 38120339Swollman n->n_next->n_prevp = n->n_prevp; 38220339Swollman} 38318316Swollman 38418316Swollman 38518316Swollman/* look for the longest? cycle from node from to node to. */ 38618316Swollmanint 38718316Swollmanfind_cycle(from, to, longest_len, depth) 38846303Smarkm NODE *from, *to; 38918316Swollman int depth, longest_len; 39018316Swollman{ 39146303Smarkm register NODE **np; 39218316Swollman register int i, len; 39318316Swollman 39418316Swollman /* 39518316Swollman * avoid infinite loops and ignore portions of the graph known 39618316Swollman * to be acyclic 39746303Smarkm */ 39818316Swollman if (from->n_flags & (NF_NODEST|NF_MARK|NF_ACYCLIC)) 39918316Swollman return (0); 40046303Smarkm from->n_flags |= NF_MARK; 40118316Swollman 40218316Swollman for (np = from->n_arcs, i = from->n_narcs; --i >= 0; np++) { 40318316Swollman cycle_buf[depth] = *np; 40420339Swollman if (*np == to) { 40518316Swollman if (depth + 1 > longest_len) { 40618316Swollman longest_len = depth + 1; 40720339Swollman (void)memcpy((char *)longest_cycle, 40818316Swollman (char *)cycle_buf, 40920339Swollman longest_len * sizeof(NODE *)); 41020339Swollman } 41118316Swollman } else { 41220339Swollman if ((*np)->n_flags & (NF_MARK|NF_ACYCLIC|NF_NODEST)) 41320339Swollman continue; 41420339Swollman len = find_cycle(*np, to, longest_len, depth + 1); 41520339Swollman 41620339Swollman if (debug) 41720339Swollman (void)printf("%*s %s->%s %d\n", depth, "", 41820339Swollman from->n_name, to->n_name, len); 41920339Swollman 42020339Swollman if (len == 0) 42120339Swollman (*np)->n_flags |= NF_NODEST; 42218316Swollman 42318316Swollman if (len > longest_len) 42420339Swollman longest_len = len; 42520339Swollman 42618316Swollman if (len > 0 && !longest) 42718316Swollman break; 42818316Swollman } 42918316Swollman } 43018316Swollman from->n_flags &= ~NF_MARK; 43118316Swollman return (longest_len); 43218316Swollman} 43318316Swollman 43418316Swollmanvoid 43518316Swollmanusage() 43618316Swollman{ 43718316Swollman (void)fprintf(stderr, "usage: tsort [-dlq] [file]\n"); 43818316Swollman exit(1); 43918316Swollman} 44018316Swollman