1/* tsort - topological sort.
2   Copyright (C) 1998-2005, 2007-2010 Free Software Foundation, Inc.
3
4   This program is free software: you can redistribute it and/or modify
5   it under the terms of the GNU General Public License as published by
6   the Free Software Foundation, either version 3 of the License, or
7   (at your option) any later version.
8
9   This program is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12   GNU General Public License for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16
17/* Written by Mark Kettenis <kettenis@phys.uva.nl>.  */
18
19/* The topological sort is done according to Algorithm T (Topological
20   sort) in Donald E. Knuth, The Art of Computer Programming, Volume
21   1/Fundamental Algorithms, page 262.  */
22
23#include <config.h>
24
25#include <assert.h>
26#include <getopt.h>
27#include <sys/types.h>
28
29#include "system.h"
30#include "long-options.h"
31#include "error.h"
32#include "quote.h"
33#include "readtokens.h"
34#include "stdio--.h"
35
36/* The official name of this program (e.g., no `g' prefix).  */
37#define PROGRAM_NAME "tsort"
38
39#define AUTHORS proper_name ("Mark Kettenis")
40
41/* Token delimiters when reading from a file.  */
42#define DELIM " \t\n"
43
44/* Members of the list of successors.  */
45struct successor
46{
47  struct item *suc;
48  struct successor *next;
49};
50
51/* Each string is held in core as the head of a list of successors.  */
52struct item
53{
54  const char *str;
55  struct item *left, *right;
56  int balance; /* -1, 0, or +1 */
57  size_t count;
58  struct item *qlink;
59  struct successor *top;
60};
61
62/* The head of the sorted list.  */
63static struct item *head = NULL;
64
65/* The tail of the list of `zeros', strings that have no predecessors.  */
66static struct item *zeros = NULL;
67
68/* Used for loop detection.  */
69static struct item *loop = NULL;
70
71/* The number of strings to sort.  */
72static size_t n_strings = 0;
73
74void
75usage (int status)
76{
77  if (status != EXIT_SUCCESS)
78    fprintf (stderr, _("Try `%s --help' for more information.\n"),
79             program_name);
80  else
81    {
82      printf (_("\
83Usage: %s [OPTION] [FILE]\n\
84Write totally ordered list consistent with the partial ordering in FILE.\n\
85With no FILE, or when FILE is -, read standard input.\n\
86\n\
87"), program_name);
88      fputs (HELP_OPTION_DESCRIPTION, stdout);
89      fputs (VERSION_OPTION_DESCRIPTION, stdout);
90      emit_ancillary_info ();
91    }
92
93  exit (status);
94}
95
96/* Create a new item/node for STR.  */
97static struct item *
98new_item (const char *str)
99{
100  struct item *k = xmalloc (sizeof *k);
101
102  k->str = (str ? xstrdup (str): NULL);
103  k->left = k->right = NULL;
104  k->balance = 0;
105
106  /* T1. Initialize (COUNT[k] <- 0 and TOP[k] <- ^).  */
107  k->count = 0;
108  k->qlink = NULL;
109  k->top = NULL;
110
111  return k;
112}
113
114/* Search binary tree rooted at *ROOT for STR.  Allocate a new tree if
115   *ROOT is NULL.  Insert a node/item for STR if not found.  Return
116   the node/item found/created for STR.
117
118   This is done according to Algorithm A (Balanced tree search and
119   insertion) in Donald E. Knuth, The Art of Computer Programming,
120   Volume 3/Searching and Sorting, pages 455--457.  */
121
122static struct item *
123search_item (struct item *root, const char *str)
124{
125  struct item *p, *q, *r, *s, *t;
126  int a;
127
128  assert (root);
129
130  /* Make sure the tree is not empty, since that is what the algorithm
131     below expects.  */
132  if (root->right == NULL)
133    return (root->right = new_item (str));
134
135  /* A1. Initialize.  */
136  t = root;
137  s = p = root->right;
138
139  for (;;)
140    {
141      /* A2. Compare.  */
142      a = strcmp (str, p->str);
143      if (a == 0)
144        return p;
145
146      /* A3 & A4.  Move left & right.  */
147      if (a < 0)
148        q = p->left;
149      else
150        q = p->right;
151
152      if (q == NULL)
153        {
154          /* A5. Insert.  */
155          q = new_item (str);
156
157          /* A3 & A4.  (continued).  */
158          if (a < 0)
159            p->left = q;
160          else
161            p->right = q;
162
163          /* A6. Adjust balance factors.  */
164          assert (!STREQ (str, s->str));
165          if (strcmp (str, s->str) < 0)
166            {
167              r = p = s->left;
168              a = -1;
169            }
170          else
171            {
172              r = p = s->right;
173              a = 1;
174            }
175
176          while (p != q)
177            {
178              assert (!STREQ (str, p->str));
179              if (strcmp (str, p->str) < 0)
180                {
181                  p->balance = -1;
182                  p = p->left;
183                }
184              else
185                {
186                  p->balance = 1;
187                  p = p->right;
188                }
189            }
190
191          /* A7. Balancing act.  */
192          if (s->balance == 0 || s->balance == -a)
193            {
194              s->balance += a;
195              return q;
196            }
197
198          if (r->balance == a)
199            {
200              /* A8. Single Rotation.  */
201              p = r;
202              if (a < 0)
203                {
204                  s->left = r->right;
205                  r->right = s;
206                }
207              else
208                {
209                  s->right = r->left;
210                  r->left = s;
211                }
212              s->balance = r->balance = 0;
213            }
214          else
215            {
216              /* A9. Double rotation.  */
217              if (a < 0)
218                {
219                  p = r->right;
220                  r->right = p->left;
221                  p->left = r;
222                  s->left = p->right;
223                  p->right = s;
224                }
225              else
226                {
227                  p = r->left;
228                  r->left = p->right;
229                  p->right = r;
230                  s->right = p->left;
231                  p->left = s;
232                }
233
234              s->balance = 0;
235              r->balance = 0;
236              if (p->balance == a)
237                s->balance = -a;
238              else if (p->balance == -a)
239                r->balance = a;
240              p->balance = 0;
241            }
242
243          /* A10. Finishing touch.  */
244          if (s == t->right)
245            t->right = p;
246          else
247            t->left = p;
248
249          return q;
250        }
251
252      /* A3 & A4.  (continued).  */
253      if (q->balance)
254        {
255          t = p;
256          s = q;
257        }
258
259      p = q;
260    }
261
262  /* NOTREACHED */
263}
264
265/* Record the fact that J precedes K.  */
266
267static void
268record_relation (struct item *j, struct item *k)
269{
270  struct successor *p;
271
272  if (!STREQ (j->str, k->str))
273    {
274      k->count++;
275      p = xmalloc (sizeof *p);
276      p->suc = k;
277      p->next = j->top;
278      j->top = p;
279    }
280}
281
282static bool
283count_items (struct item *unused ATTRIBUTE_UNUSED)
284{
285  n_strings++;
286  return false;
287}
288
289static bool
290scan_zeros (struct item *k)
291{
292  /* Ignore strings that have already been printed.  */
293  if (k->count == 0 && k->str)
294    {
295      if (head == NULL)
296        head = k;
297      else
298        zeros->qlink = k;
299
300      zeros = k;
301    }
302
303  return false;
304}
305
306/* Try to detect the loop.  If we have detected that K is part of a
307   loop, print the loop on standard error, remove a relation to break
308   the loop, and return true.
309
310   The loop detection strategy is as follows: Realise that what we're
311   dealing with is essentially a directed graph.  If we find an item
312   that is part of a graph that contains a cycle we traverse the graph
313   in backwards direction.  In general there is no unique way to do
314   this, but that is no problem.  If we encounter an item that we have
315   encountered before, we know that we've found a cycle.  All we have
316   to do now is retrace our steps, printing out the items until we
317   encounter that item again.  (This is not necessarily the item that
318   we started from originally.)  Since the order in which the items
319   are stored in the tree is not related to the specified partial
320   ordering, we may need to walk the tree several times before the
321   loop has completely been constructed.  If the loop was found, the
322   global variable LOOP will be NULL.  */
323
324static bool
325detect_loop (struct item *k)
326{
327  if (k->count > 0)
328    {
329      /* K does not have to be part of a cycle.  It is however part of
330         a graph that contains a cycle.  */
331
332      if (loop == NULL)
333        /* Start traversing the graph at K.  */
334        loop = k;
335      else
336        {
337          struct successor **p = &k->top;
338
339          while (*p)
340            {
341              if ((*p)->suc == loop)
342                {
343                  if (k->qlink)
344                    {
345                      /* We have found a loop.  Retrace our steps.  */
346                      while (loop)
347                        {
348                          struct item *tmp = loop->qlink;
349
350                          fprintf (stderr, "%s: %s\n", program_name,
351                                   loop->str);
352
353                          /* Until we encounter K again.  */
354                          if (loop == k)
355                            {
356                              /* Remove relation.  */
357                              (*p)->suc->count--;
358                              *p = (*p)->next;
359                              break;
360                            }
361
362                          /* Tidy things up since we might have to
363                             detect another loop.  */
364                          loop->qlink = NULL;
365                          loop = tmp;
366                        }
367
368                      while (loop)
369                        {
370                          struct item *tmp = loop->qlink;
371
372                          loop->qlink = NULL;
373                          loop = tmp;
374                        }
375
376                      /* Since we have found the loop, stop walking
377                         the tree.  */
378                      return true;
379                    }
380                  else
381                    {
382                      k->qlink = loop;
383                      loop = k;
384                      break;
385                    }
386                }
387
388              p = &(*p)->next;
389            }
390        }
391    }
392
393  return false;
394}
395
396/* Recurse (sub)tree rooted at ROOT, calling ACTION for each node.
397   Stop when ACTION returns true.  */
398
399static bool
400recurse_tree (struct item *root, bool (*action) (struct item *))
401{
402  if (root->left == NULL && root->right == NULL)
403    return (*action) (root);
404  else
405    {
406      if (root->left != NULL)
407        if (recurse_tree (root->left, action))
408          return true;
409      if ((*action) (root))
410        return true;
411      if (root->right != NULL)
412        if (recurse_tree (root->right, action))
413          return true;
414    }
415
416  return false;
417}
418
419/* Walk the tree specified by the head ROOT, calling ACTION for
420   each node.  */
421
422static void
423walk_tree (struct item *root, bool (*action) (struct item *))
424{
425  if (root->right)
426    recurse_tree (root->right, action);
427}
428
429/* Do a topological sort on FILE.   Return true if successful.  */
430
431static bool
432tsort (const char *file)
433{
434  bool ok = true;
435  struct item *root;
436  struct item *j = NULL;
437  struct item *k = NULL;
438  token_buffer tokenbuffer;
439  bool is_stdin = STREQ (file, "-");
440
441  /* Intialize the head of the tree will hold the strings we're sorting.  */
442  root = new_item (NULL);
443
444  if (!is_stdin && ! freopen (file, "r", stdin))
445    error (EXIT_FAILURE, errno, "%s", file);
446
447  init_tokenbuffer (&tokenbuffer);
448
449  while (1)
450    {
451      /* T2. Next Relation.  */
452      size_t len = readtoken (stdin, DELIM, sizeof (DELIM) - 1, &tokenbuffer);
453      if (len == (size_t) -1)
454        break;
455
456      assert (len != 0);
457
458      k = search_item (root, tokenbuffer.buffer);
459      if (j)
460        {
461          /* T3. Record the relation.  */
462          record_relation (j, k);
463          k = NULL;
464        }
465
466      j = k;
467    }
468
469  if (k != NULL)
470    error (EXIT_FAILURE, 0, _("%s: input contains an odd number of tokens"),
471           file);
472
473  /* T1. Initialize (N <- n).  */
474  walk_tree (root, count_items);
475
476  while (n_strings > 0)
477    {
478      /* T4. Scan for zeros.  */
479      walk_tree (root, scan_zeros);
480
481      while (head)
482        {
483          struct successor *p = head->top;
484
485          /* T5. Output front of queue.  */
486          puts (head->str);
487          head->str = NULL;	/* Avoid printing the same string twice.  */
488          n_strings--;
489
490          /* T6. Erase relations.  */
491          while (p)
492            {
493              p->suc->count--;
494              if (p->suc->count == 0)
495                {
496                  zeros->qlink = p->suc;
497                  zeros = p->suc;
498                }
499
500              p = p->next;
501            }
502
503          /* T7. Remove from queue.  */
504          head = head->qlink;
505        }
506
507      /* T8.  End of process.  */
508      if (n_strings > 0)
509        {
510          /* The input contains a loop.  */
511          error (0, 0, _("%s: input contains a loop:"), file);
512          ok = false;
513
514          /* Print the loop and remove a relation to break it.  */
515          do
516            walk_tree (root, detect_loop);
517          while (loop);
518        }
519    }
520
521  if (fclose (stdin) != 0)
522    error (EXIT_FAILURE, errno, "%s",
523           is_stdin ? _("standard input") : quote (file));
524
525  return ok;
526}
527
528int
529main (int argc, char **argv)
530{
531  bool ok;
532
533  initialize_main (&argc, &argv);
534  set_program_name (argv[0]);
535  setlocale (LC_ALL, "");
536  bindtextdomain (PACKAGE, LOCALEDIR);
537  textdomain (PACKAGE);
538
539  atexit (close_stdout);
540
541  parse_long_options (argc, argv, PROGRAM_NAME, PACKAGE, Version,
542                      usage, AUTHORS, (char const *) NULL);
543  if (getopt_long (argc, argv, "", NULL, NULL) != -1)
544    usage (EXIT_FAILURE);
545
546  if (1 < argc - optind)
547    {
548      error (0, 0, _("extra operand %s"), quote (argv[optind + 1]));
549      usage (EXIT_FAILURE);
550    }
551
552  ok = tsort (optind == argc ? "-" : argv[optind]);
553
554  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
555}
556