1#
2# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
3#
4# SPDX-License-Identifier: BSD-2-Clause
5#
6
7from __future__ import print_function
8
9
10class BracedString:
11    """A string split into components based on delimiters (usually braces).
12
13    When l occurs in the string, create a new component whose contents are
14    the rest of the string until the matching r.
15
16    When l = ( and r = ), this has the approximate behavior of splitting
17    the string into the components of a Haskell function application,
18    where each individual component, if not containing the delimiters, can
19    be split on white space to determine the arguments of the function.
20
21    This behaves exactly like a str, except for split, map, and
22    discard_enclosing_braces.
23
24    Invariant: a component either has no delimiters, or is surrounded by
25    delimiters.
26    """
27
28    def __init__(self, s, l, r, bits=None):
29        if bits is None:
30            bits = self._get_bits(s, l, r)
31        self.bits = bits
32        self.s = s
33        self.l = l
34        self.r = r
35
36    def _get_bits(self, s, l, r):
37        nesting_depth = 0
38        bits = ['']
39        for c in s:
40            if c == l:
41                if nesting_depth == 0:
42                    if bits[-1]:
43                        bits.append('')
44                nesting_depth = nesting_depth + 1
45            bits[-1] = bits[-1] + c
46            if c == r:
47                nesting_depth = nesting_depth - 1
48                if nesting_depth == 0:
49                    if bits[-1]:
50                        bits.append('')
51        if not bits[-1]:
52            bits.pop(-1)
53        return bits
54
55    def __str__(self):
56        return self.s
57
58    def __repr__(self):
59        check = BracedString(self.s, self.l, self.r)
60        if check.bits == self.bits:
61            return 'B%s%s: %r' % (self.l, self.r, self.s)
62        else:
63            return 'Broken Braced: %r, %r, %r' % (self.s, self.bits,
64                                                  check.bits)
65
66    def __add__(self, other):
67        if isinstance(other, BracedString):
68            if self.bits[-1].startswith(self.l):
69                bits = self.bits + other.bits
70            elif other.bits[0].startswith(self.l):
71                bits = self.bits + other.bits
72            else:
73                bits = self.bits[:-1] + \
74                    [self.bits[-1] + other.bits[0]] + \
75                    other.bits[1:]
76            return BracedString(self.s + other.s, self.l, self.r, bits)
77
78        return BracedString(self.s + other, self.l, self.r)
79
80    def __eq__(self, other):
81        return other == self.s
82
83    def __ne__(self, other):
84        return other != self.s
85
86    def __iter__(self):
87        return iter(self.s)
88
89    def __getitem__(self, n):
90        return self.s[n]
91
92    def __getslice__(self, i, j):
93        return self.s.__getslice__(i, j)
94
95    def __len__(self):
96        return len(self.s)
97
98    def split(self, str=None, num=-2, braces=False):
99        """Split into multiple BracedStrings, using `str` as a delimiter, and
100        into a maximum of `num` components.
101
102        If `braces` is true (defaults to false), braces will also count as a
103        delimiter, and each braced component will become a single element of
104        the output.
105
106        Otherwise, each braced pair will not be split into a separate
107        component, but splitting will ignore the contents inside the
108        delimiter.
109        """
110        if braces:
111            bits = []
112            bbs = []
113            for bit in self.bits:
114                d = num + 1 - len(bits)
115                if d == 0:
116                    bits[-1] = bits[-1] + bit
117                    bbs[-1].append(bit)
118                elif bit.startswith(self.l):
119                    bits.append(bit)
120                    bbs.append([bit])
121                else:
122                    if num == -2:
123                        n_bits = bit.split(str)
124                    else:
125                        n_bits = bit.split(str, d)
126                    bits.extend(n_bits)
127                    bbs.extend([[b] for b in n_bits])
128        else:
129            # s is the original string, but with delimited substrings replaced
130            # with just the delimiters
131            s = ''
132            internals = []
133            for bit in self.bits:
134                if bit.startswith(self.l):
135                    s = s + self.l + self.r
136                    internals.append(bit)
137                else:
138                    s = s + bit
139            # split on the thing, secure in the knowledge that it won't mess
140            # up things inside delimiters.
141            bits1 = s.split(str, num)
142            bits = []
143            bbs = []
144            for bit in bits1:
145                # Invariant: if self.{l,r} not in bit, bit remains whole.
146
147                # split on delimiters, which we inserted earlier
148                bits2 = bit.split(self.l + self.r)
149                meshed = [bits2.pop(0)]
150                while bits2:
151                    # If this list has more elements, then we need to insert,
152                    # where each delimiter pair was, the corresponding
153                    # contents which we stored in `internals`.
154                    meshed.append(internals.pop(0))
155                    # then we add in the next component of the string, which
156                    # was after that delimiter pair.
157                    meshed.append(bits2.pop(0))
158                # remove empty strings
159                meshed = [s for s in meshed if s != '']
160                bbs.append(meshed)
161                bits.append(''.join(meshed))
162
163        return [BracedString(bit, self.l, self.r, bbs[i])
164                for i, bit in enumerate(bits)]
165
166    def startswith(self, s):
167        return self.s.startswith(s)
168
169    def endswith(self, s):
170        return self.s.endswith(s)
171
172    def map(self, fn):
173        """Apply a function to each component of this braced string.
174
175        For delimited components, the delimiters will not be passed to the
176        function.
177        """
178        new_s = ''
179        new_bits = []
180
181        for bit in self.bits:
182            if bit.startswith(self.l):
183                new = fn(bit[1:-1])
184                new = self.l + new + self.r
185                new_s = new_s + new
186                new_bits.append(new)
187            else:
188                new_s = new_s + bit
189                new_bits.append(bit)
190
191        return BracedString(new_s, self.l, self.r, new_bits)
192
193    def discard_enclosing_braces(self):
194        """If the string consists of one braced expression,
195                discard the redundant enclosing braces. Otherwise
196                return the string."""
197        if len(self.bits) > 1:
198            return self
199
200        [bit] = self.bits
201
202        if bit.startswith(self.l):
203            return BracedString(bit[1:-1], self.l, self.r)
204        else:
205            return self
206
207
208def clone(str, obj):
209    if isinstance(obj, BracedString):
210        return BracedString(str.__str__(), obj.l, obj.r)
211    else:
212        return str
213
214
215str = BracedString
216
217if __name__ == '__main__':
218    x = BracedString('a => b => c => (d => (e, f))', '(', ')')
219    print(x.split('=>'))
220    print(x.split(','))
221    print(1, x.split('=>', 1))
222    print(2, x.split('=>', 2))
223    print(3, x.split('=>', 3))
224    print([y.split() for y in x.split('=>')])
225