1# 2# Copyright 2020, Data61, CSIRO (ABN 41 687 119 230) 3# 4# SPDX-License-Identifier: BSD-2-Clause 5# 6 7from __future__ import print_function 8 9 10class BracedString: 11 """A string split into components based on delimiters (usually braces). 12 13 When l occurs in the string, create a new component whose contents are 14 the rest of the string until the matching r. 15 16 When l = ( and r = ), this has the approximate behavior of splitting 17 the string into the components of a Haskell function application, 18 where each individual component, if not containing the delimiters, can 19 be split on white space to determine the arguments of the function. 20 21 This behaves exactly like a str, except for split, map, and 22 discard_enclosing_braces. 23 24 Invariant: a component either has no delimiters, or is surrounded by 25 delimiters. 26 """ 27 28 def __init__(self, s, l, r, bits=None): 29 if bits is None: 30 bits = self._get_bits(s, l, r) 31 self.bits = bits 32 self.s = s 33 self.l = l 34 self.r = r 35 36 def _get_bits(self, s, l, r): 37 nesting_depth = 0 38 bits = [''] 39 for c in s: 40 if c == l: 41 if nesting_depth == 0: 42 if bits[-1]: 43 bits.append('') 44 nesting_depth = nesting_depth + 1 45 bits[-1] = bits[-1] + c 46 if c == r: 47 nesting_depth = nesting_depth - 1 48 if nesting_depth == 0: 49 if bits[-1]: 50 bits.append('') 51 if not bits[-1]: 52 bits.pop(-1) 53 return bits 54 55 def __str__(self): 56 return self.s 57 58 def __repr__(self): 59 check = BracedString(self.s, self.l, self.r) 60 if check.bits == self.bits: 61 return 'B%s%s: %r' % (self.l, self.r, self.s) 62 else: 63 return 'Broken Braced: %r, %r, %r' % (self.s, self.bits, 64 check.bits) 65 66 def __add__(self, other): 67 if isinstance(other, BracedString): 68 if self.bits[-1].startswith(self.l): 69 bits = self.bits + other.bits 70 elif other.bits[0].startswith(self.l): 71 bits = self.bits + other.bits 72 else: 73 bits = self.bits[:-1] + \ 74 [self.bits[-1] + other.bits[0]] + \ 75 other.bits[1:] 76 return BracedString(self.s + other.s, self.l, self.r, bits) 77 78 return BracedString(self.s + other, self.l, self.r) 79 80 def __eq__(self, other): 81 return other == self.s 82 83 def __ne__(self, other): 84 return other != self.s 85 86 def __iter__(self): 87 return iter(self.s) 88 89 def __getitem__(self, n): 90 return self.s[n] 91 92 def __getslice__(self, i, j): 93 return self.s.__getslice__(i, j) 94 95 def __len__(self): 96 return len(self.s) 97 98 def split(self, str=None, num=-2, braces=False): 99 """Split into multiple BracedStrings, using `str` as a delimiter, and 100 into a maximum of `num` components. 101 102 If `braces` is true (defaults to false), braces will also count as a 103 delimiter, and each braced component will become a single element of 104 the output. 105 106 Otherwise, each braced pair will not be split into a separate 107 component, but splitting will ignore the contents inside the 108 delimiter. 109 """ 110 if braces: 111 bits = [] 112 bbs = [] 113 for bit in self.bits: 114 d = num + 1 - len(bits) 115 if d == 0: 116 bits[-1] = bits[-1] + bit 117 bbs[-1].append(bit) 118 elif bit.startswith(self.l): 119 bits.append(bit) 120 bbs.append([bit]) 121 else: 122 if num == -2: 123 n_bits = bit.split(str) 124 else: 125 n_bits = bit.split(str, d) 126 bits.extend(n_bits) 127 bbs.extend([[b] for b in n_bits]) 128 else: 129 # s is the original string, but with delimited substrings replaced 130 # with just the delimiters 131 s = '' 132 internals = [] 133 for bit in self.bits: 134 if bit.startswith(self.l): 135 s = s + self.l + self.r 136 internals.append(bit) 137 else: 138 s = s + bit 139 # split on the thing, secure in the knowledge that it won't mess 140 # up things inside delimiters. 141 bits1 = s.split(str, num) 142 bits = [] 143 bbs = [] 144 for bit in bits1: 145 # Invariant: if self.{l,r} not in bit, bit remains whole. 146 147 # split on delimiters, which we inserted earlier 148 bits2 = bit.split(self.l + self.r) 149 meshed = [bits2.pop(0)] 150 while bits2: 151 # If this list has more elements, then we need to insert, 152 # where each delimiter pair was, the corresponding 153 # contents which we stored in `internals`. 154 meshed.append(internals.pop(0)) 155 # then we add in the next component of the string, which 156 # was after that delimiter pair. 157 meshed.append(bits2.pop(0)) 158 # remove empty strings 159 meshed = [s for s in meshed if s != ''] 160 bbs.append(meshed) 161 bits.append(''.join(meshed)) 162 163 return [BracedString(bit, self.l, self.r, bbs[i]) 164 for i, bit in enumerate(bits)] 165 166 def startswith(self, s): 167 return self.s.startswith(s) 168 169 def endswith(self, s): 170 return self.s.endswith(s) 171 172 def map(self, fn): 173 """Apply a function to each component of this braced string. 174 175 For delimited components, the delimiters will not be passed to the 176 function. 177 """ 178 new_s = '' 179 new_bits = [] 180 181 for bit in self.bits: 182 if bit.startswith(self.l): 183 new = fn(bit[1:-1]) 184 new = self.l + new + self.r 185 new_s = new_s + new 186 new_bits.append(new) 187 else: 188 new_s = new_s + bit 189 new_bits.append(bit) 190 191 return BracedString(new_s, self.l, self.r, new_bits) 192 193 def discard_enclosing_braces(self): 194 """If the string consists of one braced expression, 195 discard the redundant enclosing braces. Otherwise 196 return the string.""" 197 if len(self.bits) > 1: 198 return self 199 200 [bit] = self.bits 201 202 if bit.startswith(self.l): 203 return BracedString(bit[1:-1], self.l, self.r) 204 else: 205 return self 206 207 208def clone(str, obj): 209 if isinstance(obj, BracedString): 210 return BracedString(str.__str__(), obj.l, obj.r) 211 else: 212 return str 213 214 215str = BracedString 216 217if __name__ == '__main__': 218 x = BracedString('a => b => c => (d => (e, f))', '(', ')') 219 print(x.split('=>')) 220 print(x.split(',')) 221 print(1, x.split('=>', 1)) 222 print(2, x.split('=>', 2)) 223 print(3, x.split('=>', 3)) 224 print([y.split() for y in x.split('=>')]) 225