1/*
2 * "$Id: mxml-index.c 426 2011-01-01 23:42:17Z mike $"
3 *
4 * Index support code for Mini-XML, a small XML-like file parsing library.
5 *
6 * Copyright 2003-2011 by Michael R Sweet.
7 *
8 * These coded instructions, statements, and computer programs are the
9 * property of Michael R Sweet and are protected by Federal copyright
10 * law.  Distribution and use rights are outlined in the file "COPYING"
11 * which should have been included with this file.  If this file is
12 * missing or damaged, see the license at:
13 *
14 *     http://www.minixml.org/
15 *
16 * Contents:
17 *
18 */
19
20/*
21 * Include necessary headers...
22 */
23
24#include "config.h"
25#include "mxml.h"
26
27
28/*
29 * Sort functions...
30 */
31
32static int	index_compare(mxml_index_t *ind, mxml_node_t *first,
33		              mxml_node_t *second);
34static int	index_find(mxml_index_t *ind, const char *element,
35		           const char *value, mxml_node_t *node);
36static void	index_sort(mxml_index_t *ind, int left, int right);
37
38
39/*
40 * 'mxmlIndexDelete()' - Delete an index.
41 */
42
43void
44mxmlIndexDelete(mxml_index_t *ind)	/* I - Index to delete */
45{
46 /*
47  * Range check input..
48  */
49
50  if (!ind)
51    return;
52
53 /*
54  * Free memory...
55  */
56
57  if (ind->attr)
58    free(ind->attr);
59
60  if (ind->alloc_nodes)
61    free(ind->nodes);
62
63  free(ind);
64}
65
66
67/*
68 * 'mxmlIndexEnum()' - Return the next node in the index.
69 *
70 * Nodes are returned in the sorted order of the index.
71 */
72
73mxml_node_t *				/* O - Next node or NULL if there is none */
74mxmlIndexEnum(mxml_index_t *ind)	/* I - Index to enumerate */
75{
76 /*
77  * Range check input...
78  */
79
80  if (!ind)
81    return (NULL);
82
83 /*
84  * Return the next node...
85  */
86
87  if (ind->cur_node < ind->num_nodes)
88    return (ind->nodes[ind->cur_node ++]);
89  else
90    return (NULL);
91}
92
93
94/*
95 * 'mxmlIndexFind()' - Find the next matching node.
96 *
97 * You should call mxmlIndexReset() prior to using this function for
98 * the first time with a particular set of "element" and "value"
99 * strings. Passing NULL for both "element" and "value" is equivalent
100 * to calling mxmlIndexEnum().
101 */
102
103mxml_node_t *				/* O - Node or NULL if none found */
104mxmlIndexFind(mxml_index_t *ind,	/* I - Index to search */
105              const char   *element,	/* I - Element name to find, if any */
106	      const char   *value)	/* I - Attribute value, if any */
107{
108  int		diff,			/* Difference between names */
109		current,		/* Current entity in search */
110		first,			/* First entity in search */
111		last;			/* Last entity in search */
112
113
114#ifdef DEBUG
115  printf("mxmlIndexFind(ind=%p, element=\"%s\", value=\"%s\")\n",
116         ind, element ? element : "(null)", value ? value : "(null)");
117#endif /* DEBUG */
118
119 /*
120  * Range check input...
121  */
122
123  if (!ind || (!ind->attr && value))
124  {
125#ifdef DEBUG
126    puts("    returning NULL...");
127    printf("    ind->attr=\"%s\"\n", ind->attr ? ind->attr : "(null)");
128#endif /* DEBUG */
129
130    return (NULL);
131  }
132
133 /*
134  * If both element and value are NULL, just enumerate the nodes in the
135  * index...
136  */
137
138  if (!element && !value)
139    return (mxmlIndexEnum(ind));
140
141 /*
142  * If there are no nodes in the index, return NULL...
143  */
144
145  if (!ind->num_nodes)
146  {
147#ifdef DEBUG
148    puts("    returning NULL...");
149    puts("    no nodes!");
150#endif /* DEBUG */
151
152    return (NULL);
153  }
154
155 /*
156  * If cur_node == 0, then find the first matching node...
157  */
158
159  if (ind->cur_node == 0)
160  {
161   /*
162    * Find the first node using a modified binary search algorithm...
163    */
164
165    first = 0;
166    last  = ind->num_nodes - 1;
167
168#ifdef DEBUG
169    printf("    find first time, num_nodes=%d...\n", ind->num_nodes);
170#endif /* DEBUG */
171
172    while ((last - first) > 1)
173    {
174      current = (first + last) / 2;
175
176#ifdef DEBUG
177      printf("    first=%d, last=%d, current=%d\n", first, last, current);
178#endif /* DEBUG */
179
180      if ((diff = index_find(ind, element, value, ind->nodes[current])) == 0)
181      {
182       /*
183        * Found a match, move back to find the first...
184	*/
185
186#ifdef DEBUG
187        puts("    match!");
188#endif /* DEBUG */
189
190        while (current > 0 &&
191	       !index_find(ind, element, value, ind->nodes[current - 1]))
192	  current --;
193
194#ifdef DEBUG
195        printf("    returning first match=%d\n", current);
196#endif /* DEBUG */
197
198       /*
199        * Return the first match and save the index to the next...
200	*/
201
202        ind->cur_node = current + 1;
203
204	return (ind->nodes[current]);
205      }
206      else if (diff < 0)
207	last = current;
208      else
209	first = current;
210
211#ifdef DEBUG
212      printf("    diff=%d\n", diff);
213#endif /* DEBUG */
214    }
215
216   /*
217    * If we get this far, then we found exactly 0 or 1 matches...
218    */
219
220    for (current = first; current <= last; current ++)
221      if (!index_find(ind, element, value, ind->nodes[current]))
222      {
223       /*
224	* Found exactly one (or possibly two) match...
225	*/
226
227#ifdef DEBUG
228	printf("    returning only match %d...\n", current);
229#endif /* DEBUG */
230
231	ind->cur_node = current + 1;
232
233	return (ind->nodes[current]);
234      }
235
236   /*
237    * No matches...
238    */
239
240    ind->cur_node = ind->num_nodes;
241
242#ifdef DEBUG
243    puts("    returning NULL...");
244#endif /* DEBUG */
245
246    return (NULL);
247  }
248  else if (ind->cur_node < ind->num_nodes &&
249           !index_find(ind, element, value, ind->nodes[ind->cur_node]))
250  {
251   /*
252    * Return the next matching node...
253    */
254
255#ifdef DEBUG
256    printf("    returning next match %d...\n", ind->cur_node);
257#endif /* DEBUG */
258
259    return (ind->nodes[ind->cur_node ++]);
260  }
261
262 /*
263  * If we get this far, then we have no matches...
264  */
265
266  ind->cur_node = ind->num_nodes;
267
268#ifdef DEBUG
269  puts("    returning NULL...");
270#endif /* DEBUG */
271
272  return (NULL);
273}
274
275
276/*
277 * 'mxmlIndexGetCount()' - Get the number of nodes in an index.
278 *
279 * @since Mini-XML 2.7@
280 */
281
282int					/* I - Number of nodes in index */
283mxmlIndexGetCount(mxml_index_t *ind)	/* I - Index of nodes */
284{
285 /*
286  * Range check input...
287  */
288
289  if (!ind)
290    return (0);
291
292 /*
293  * Return the number of nodes in the index...
294  */
295
296  return (ind->num_nodes);
297}
298
299
300/*
301 * 'mxmlIndexNew()' - Create a new index.
302 *
303 * The index will contain all nodes that contain the named element and/or
304 * attribute. If both "element" and "attr" are NULL, then the index will
305 * contain a sorted list of the elements in the node tree.  Nodes are
306 * sorted by element name and optionally by attribute value if the "attr"
307 * argument is not NULL.
308 */
309
310mxml_index_t *				/* O - New index */
311mxmlIndexNew(mxml_node_t *node,		/* I - XML node tree */
312             const char  *element,	/* I - Element to index or NULL for all */
313             const char  *attr)		/* I - Attribute to index or NULL for none */
314{
315  mxml_index_t	*ind;			/* New index */
316  mxml_node_t	*current,		/* Current node in index */
317  		**temp;			/* Temporary node pointer array */
318
319
320 /*
321  * Range check input...
322  */
323
324#ifdef DEBUG
325  printf("mxmlIndexNew(node=%p, element=\"%s\", attr=\"%s\")\n",
326         node, element ? element : "(null)", attr ? attr : "(null)");
327#endif /* DEBUG */
328
329  if (!node)
330    return (NULL);
331
332 /*
333  * Create a new index...
334  */
335
336  if ((ind = calloc(1, sizeof(mxml_index_t))) == NULL)
337  {
338    mxml_error("Unable to allocate %d bytes for index - %s",
339               sizeof(mxml_index_t), strerror(errno));
340    return (NULL);
341  }
342
343  if (attr)
344    ind->attr = strdup(attr);
345
346  if (!element && !attr)
347    current = node;
348  else
349    current = mxmlFindElement(node, node, element, attr, NULL, MXML_DESCEND);
350
351  while (current)
352  {
353    if (ind->num_nodes >= ind->alloc_nodes)
354    {
355      if (!ind->alloc_nodes)
356        temp = malloc(64 * sizeof(mxml_node_t *));
357      else
358        temp = realloc(ind->nodes, (ind->alloc_nodes + 64) * sizeof(mxml_node_t *));
359
360      if (!temp)
361      {
362       /*
363        * Unable to allocate memory for the index, so abort...
364	*/
365
366        mxml_error("Unable to allocate %d bytes for index: %s",
367	           (ind->alloc_nodes + 64) * sizeof(mxml_node_t *),
368		   strerror(errno));
369
370        mxmlIndexDelete(ind);
371	return (NULL);
372      }
373
374      ind->nodes       = temp;
375      ind->alloc_nodes += 64;
376    }
377
378    ind->nodes[ind->num_nodes ++] = current;
379
380    current = mxmlFindElement(current, node, element, attr, NULL, MXML_DESCEND);
381  }
382
383 /*
384  * Sort nodes based upon the search criteria...
385  */
386
387#ifdef DEBUG
388  {
389    int i;				/* Looping var */
390
391
392    printf("%d node(s) in index.\n\n", ind->num_nodes);
393
394    if (attr)
395    {
396      printf("Node      Address   Element         %s\n", attr);
397      puts("--------  --------  --------------  ------------------------------");
398
399      for (i = 0; i < ind->num_nodes; i ++)
400	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
401	       ind->nodes[i]->value.element.name,
402	       mxmlElementGetAttr(ind->nodes[i], attr));
403    }
404    else
405    {
406      puts("Node      Address   Element");
407      puts("--------  --------  --------------");
408
409      for (i = 0; i < ind->num_nodes; i ++)
410	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
411	       ind->nodes[i]->value.element.name);
412    }
413
414    putchar('\n');
415  }
416#endif /* DEBUG */
417
418  if (ind->num_nodes > 1)
419    index_sort(ind, 0, ind->num_nodes - 1);
420
421#ifdef DEBUG
422  {
423    int i;				/* Looping var */
424
425
426    puts("After sorting:\n");
427
428    if (attr)
429    {
430      printf("Node      Address   Element         %s\n", attr);
431      puts("--------  --------  --------------  ------------------------------");
432
433      for (i = 0; i < ind->num_nodes; i ++)
434	printf("%8d  %-8p  %-14.14s  %s\n", i, ind->nodes[i],
435	       ind->nodes[i]->value.element.name,
436	       mxmlElementGetAttr(ind->nodes[i], attr));
437    }
438    else
439    {
440      puts("Node      Address   Element");
441      puts("--------  --------  --------------");
442
443      for (i = 0; i < ind->num_nodes; i ++)
444	printf("%8d  %-8p  %s\n", i, ind->nodes[i],
445	       ind->nodes[i]->value.element.name);
446    }
447
448    putchar('\n');
449  }
450#endif /* DEBUG */
451
452 /*
453  * Return the new index...
454  */
455
456  return (ind);
457}
458
459
460/*
461 * 'mxmlIndexReset()' - Reset the enumeration/find pointer in the index and
462 *                      return the first node in the index.
463 *
464 * This function should be called prior to using mxmlIndexEnum() or
465 * mxmlIndexFind() for the first time.
466 */
467
468mxml_node_t *				/* O - First node or NULL if there is none */
469mxmlIndexReset(mxml_index_t *ind)	/* I - Index to reset */
470{
471#ifdef DEBUG
472  printf("mxmlIndexReset(ind=%p)\n", ind);
473#endif /* DEBUG */
474
475 /*
476  * Range check input...
477  */
478
479  if (!ind)
480    return (NULL);
481
482 /*
483  * Set the index to the first element...
484  */
485
486  ind->cur_node = 0;
487
488 /*
489  * Return the first node...
490  */
491
492  if (ind->num_nodes)
493    return (ind->nodes[0]);
494  else
495    return (NULL);
496}
497
498
499/*
500 * 'index_compare()' - Compare two nodes.
501 */
502
503static int				/* O - Result of comparison */
504index_compare(mxml_index_t *ind,	/* I - Index */
505              mxml_node_t  *first,	/* I - First node */
506              mxml_node_t  *second)	/* I - Second node */
507{
508  int	diff;				/* Difference */
509
510
511 /*
512  * Check the element name...
513  */
514
515  if ((diff = strcmp(first->value.element.name,
516                     second->value.element.name)) != 0)
517    return (diff);
518
519 /*
520  * Check the attribute value...
521  */
522
523  if (ind->attr)
524  {
525    if ((diff = strcmp(mxmlElementGetAttr(first, ind->attr),
526                       mxmlElementGetAttr(second, ind->attr))) != 0)
527      return (diff);
528  }
529
530 /*
531  * No difference, return 0...
532  */
533
534  return (0);
535}
536
537
538/*
539 * 'index_find()' - Compare a node with index values.
540 */
541
542static int				/* O - Result of comparison */
543index_find(mxml_index_t *ind,		/* I - Index */
544           const char   *element,	/* I - Element name or NULL */
545	   const char   *value,		/* I - Attribute value or NULL */
546           mxml_node_t  *node)		/* I - Node */
547{
548  int	diff;				/* Difference */
549
550
551 /*
552  * Check the element name...
553  */
554
555  if (element)
556  {
557    if ((diff = strcmp(element, node->value.element.name)) != 0)
558      return (diff);
559  }
560
561 /*
562  * Check the attribute value...
563  */
564
565  if (value)
566  {
567    if ((diff = strcmp(value, mxmlElementGetAttr(node, ind->attr))) != 0)
568      return (diff);
569  }
570
571 /*
572  * No difference, return 0...
573  */
574
575  return (0);
576}
577
578
579/*
580 * 'index_sort()' - Sort the nodes in the index...
581 *
582 * This function implements the classic quicksort algorithm...
583 */
584
585static void
586index_sort(mxml_index_t *ind,		/* I - Index to sort */
587           int          left,		/* I - Left node in partition */
588	   int          right)		/* I - Right node in partition */
589{
590  mxml_node_t	*pivot,			/* Pivot node */
591		*temp;			/* Swap node */
592  int		templ,			/* Temporary left node */
593		tempr;			/* Temporary right node */
594
595
596 /*
597  * Loop until we have sorted all the way to the right...
598  */
599
600  do
601  {
602   /*
603    * Sort the pivot in the current partition...
604    */
605
606    pivot = ind->nodes[left];
607
608    for (templ = left, tempr = right; templ < tempr;)
609    {
610     /*
611      * Move left while left node <= pivot node...
612      */
613
614      while ((templ < right) &&
615             index_compare(ind, ind->nodes[templ], pivot) <= 0)
616	templ ++;
617
618     /*
619      * Move right while right node > pivot node...
620      */
621
622      while ((tempr > left) &&
623             index_compare(ind, ind->nodes[tempr], pivot) > 0)
624	tempr --;
625
626     /*
627      * Swap nodes if needed...
628      */
629
630      if (templ < tempr)
631      {
632	temp              = ind->nodes[templ];
633	ind->nodes[templ] = ind->nodes[tempr];
634	ind->nodes[tempr] = temp;
635      }
636    }
637
638   /*
639    * When we get here, the right (tempr) node is the new position for the
640    * pivot node...
641    */
642
643    if (index_compare(ind, pivot, ind->nodes[tempr]) > 0)
644    {
645      ind->nodes[left]  = ind->nodes[tempr];
646      ind->nodes[tempr] = pivot;
647    }
648
649   /*
650    * Recursively sort the left partition as needed...
651    */
652
653    if (left < (tempr - 1))
654      index_sort(ind, left, tempr - 1);
655  }
656  while (right > (left = tempr + 1));
657}
658
659
660/*
661 * End of "$Id: mxml-index.c 426 2011-01-01 23:42:17Z mike $".
662 */
663