1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 23import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 24 25@SuppressWarnings("javadoc") 26public final class CodeRangeBuffer implements Cloneable { 27 private static final int INIT_MULTI_BYTE_RANGE_SIZE = 5; 28 private static final int ALL_MULTI_BYTE_RANGE = 0x7fffffff; 29 30 int[] p; 31 int used; 32 33 34 public CodeRangeBuffer() { 35 p = new int[INIT_MULTI_BYTE_RANGE_SIZE]; 36 writeCodePoint(0, 0); 37 } 38 39 // CodeRange.isInCodeRange 40 public boolean isInCodeRange(final int code) { 41 int low = 0; 42 final int n = p[0]; 43 int high = n; 44 45 while (low < high) { 46 final int x = (low + high) >> 1; 47 if (code > p[(x << 1) + 2]) { 48 low = x + 1; 49 } else { 50 high = x; 51 } 52 } 53 return low < n && code >= p[(low << 1) + 1]; 54 } 55 56 private CodeRangeBuffer(final CodeRangeBuffer orig) { 57 p = new int[orig.p.length]; 58 System.arraycopy(orig.p, 0, p, 0, p.length); 59 used = orig.used; 60 } 61 62 @Override 63 public String toString() { 64 final StringBuilder buf = new StringBuilder(); 65 buf.append("CodeRange"); 66 buf.append("\n used: ").append(used); 67 buf.append("\n code point: ").append(p[0]); 68 buf.append("\n ranges: "); 69 70 for (int i=0; i<p[0]; i++) { 71 buf.append("[").append(rangeNumToString(p[i * 2 + 1])).append("..").append(rangeNumToString(p[i * 2 + 2])).append("]"); 72 if (i > 0 && i % 6 == 0) { 73 buf.append("\n "); 74 } 75 } 76 77 return buf.toString(); 78 } 79 80 private static String rangeNumToString(final int num){ 81 return "0x" + Integer.toString(num, 16); 82 } 83 84 public void expand(final int low) { 85 int length = p.length; 86 do { length <<= 1; } while (length < low); 87 final int[]tmp = new int[length]; 88 System.arraycopy(p, 0, tmp, 0, used); 89 p = tmp; 90 } 91 92 public void ensureSize(final int size) { 93 int length = p.length; 94 while (length < size ) { length <<= 1; } 95 if (p.length != length) { 96 final int[]tmp = new int[length]; 97 System.arraycopy(p, 0, tmp, 0, used); 98 p = tmp; 99 } 100 } 101 102 private void moveRight(final int from, final int to, final int n) { 103 if (to + n > p.length) { 104 expand(to + n); 105 } 106 System.arraycopy(p, from, p, to, n); 107 if (to + n > used) { 108 used = to + n; 109 } 110 } 111 112 protected void moveLeft(final int from, final int to, final int n) { 113 System.arraycopy(p, from, p, to, n); 114 } 115 116 private void moveLeftAndReduce(final int from, final int to) { 117 System.arraycopy(p, from, p, to, used - from); 118 used -= from - to; 119 } 120 121 public void writeCodePoint(final int pos, final int b) { 122 final int u = pos + 1; 123 if (p.length < u) { 124 expand(u); 125 } 126 p[pos] = b; 127 if (used < u) { 128 used = u; 129 } 130 } 131 132 @Override 133 public CodeRangeBuffer clone() { 134 return new CodeRangeBuffer(this); 135 } 136 137 // ugly part: these methods should be made OO 138 // add_code_range_to_buf 139 public static CodeRangeBuffer addCodeRangeToBuff(final CodeRangeBuffer pbufp, final int fromp, final int top) { 140 int from = fromp, to = top; 141 CodeRangeBuffer pbuf = pbufp; 142 143 if (from > to) { 144 final int n = from; 145 from = to; 146 to = n; 147 } 148 149 if (pbuf == null) { 150 pbuf = new CodeRangeBuffer(); // move to CClassNode 151 } 152 153 final int[]p = pbuf.p; 154 int n = p[0]; 155 156 int low = 0; 157 int bound = n; 158 159 while (low < bound) { 160 final int x = (low + bound) >>> 1; 161 if (from > p[x * 2 + 2]) { 162 low = x + 1; 163 } else { 164 bound = x; 165 } 166 } 167 168 int high = low; 169 bound = n; 170 171 while (high < bound) { 172 final int x = (high + bound) >>> 1; 173 if (to >= p[x * 2 + 1] - 1) { 174 high = x + 1; 175 } else { 176 bound = x; 177 } 178 } 179 180 final int incN = low + 1 - high; 181 182 if (n + incN > Config.MAX_MULTI_BYTE_RANGES_NUM) { 183 throw new ValueException(ErrorMessages.ERR_TOO_MANY_MULTI_BYTE_RANGES); 184 } 185 186 if (incN != 1) { 187 if (from > p[low * 2 + 1]) { 188 from = p[low * 2 + 1]; 189 } 190 if (to < p[(high - 1) * 2 + 2]) { 191 to = p[(high - 1) * 2 + 2]; 192 } 193 } 194 195 if (incN != 0 && high < n) { 196 final int fromPos = 1 + high * 2; 197 final int toPos = 1 + (low + 1) * 2; 198 final int size = (n - high) * 2; 199 200 if (incN > 0) { 201 pbuf.moveRight(fromPos, toPos, size); 202 } else { 203 pbuf.moveLeftAndReduce(fromPos, toPos); 204 } 205 } 206 207 final int pos = 1 + low * 2; 208 // pbuf.ensureSize(pos + 2); 209 pbuf.writeCodePoint(pos, from); 210 pbuf.writeCodePoint(pos + 1, to); 211 n += incN; 212 pbuf.writeCodePoint(0, n); 213 214 return pbuf; 215 } 216 217 // add_code_range, be aware of it returning null! 218 public static CodeRangeBuffer addCodeRange(final CodeRangeBuffer pbuf, final ScanEnvironment env, final int from, final int to) { 219 if (from > to) { 220 if (env.syntax.allowEmptyRangeInCC()) { 221 return pbuf; 222 } 223 throw new ValueException(ErrorMessages.ERR_EMPTY_RANGE_IN_CHAR_CLASS); 224 } 225 return addCodeRangeToBuff(pbuf, from, to); 226 } 227 228 // SET_ALL_MULTI_BYTE_RANGE 229 protected static CodeRangeBuffer setAllMultiByteRange(final CodeRangeBuffer pbuf) { 230 return addCodeRangeToBuff(pbuf, EncodingHelper.mbcodeStartPosition(), ALL_MULTI_BYTE_RANGE); 231 } 232 233 // ADD_ALL_MULTI_BYTE_RANGE 234 public static CodeRangeBuffer addAllMultiByteRange(final CodeRangeBuffer pbuf) { 235 return setAllMultiByteRange(pbuf); 236 } 237 238 // not_code_range_buf 239 public static CodeRangeBuffer notCodeRangeBuff(final CodeRangeBuffer bbuf) { 240 CodeRangeBuffer pbuf = null; 241 242 if (bbuf == null) { 243 return setAllMultiByteRange(pbuf); 244 } 245 246 final int[]p = bbuf.p; 247 final int n = p[0]; 248 249 if (n <= 0) { 250 return setAllMultiByteRange(pbuf); 251 } 252 253 int pre = EncodingHelper.mbcodeStartPosition(); 254 255 int from; 256 int to = 0; 257 for (int i=0; i<n; i++) { 258 from = p[i * 2 + 1]; 259 to = p[i * 2 + 2]; 260 if (pre <= from - 1) { 261 pbuf = addCodeRangeToBuff(pbuf, pre, from - 1); 262 } 263 if (to == ALL_MULTI_BYTE_RANGE) { 264 break; 265 } 266 pre = to + 1; 267 } 268 269 if (to < ALL_MULTI_BYTE_RANGE) { 270 pbuf = addCodeRangeToBuff(pbuf, to + 1, ALL_MULTI_BYTE_RANGE); 271 } 272 return pbuf; 273 } 274 275 // or_code_range_buf 276 public static CodeRangeBuffer orCodeRangeBuff(final CodeRangeBuffer bbuf1p, final boolean not1p, 277 final CodeRangeBuffer bbuf2p, final boolean not2p) { 278 CodeRangeBuffer pbuf = null; 279 CodeRangeBuffer bbuf1 = bbuf1p; 280 CodeRangeBuffer bbuf2 = bbuf2p; 281 boolean not1 = not1p; 282 boolean not2 = not2p; 283 284 if (bbuf1 == null && bbuf2 == null) { 285 if (not1 || not2) { 286 return setAllMultiByteRange(pbuf); 287 } 288 return null; 289 } 290 291 if (bbuf2 == null) { 292 CodeRangeBuffer tbuf; 293 boolean tnot; 294 // swap 295 tnot = not1; not1 = not2; not2 = tnot; 296 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; 297 } 298 299 if (bbuf1 == null) { 300 if (not1) { 301 return setAllMultiByteRange(pbuf); 302 } 303 if (!not2) { 304 return bbuf2.clone(); 305 } 306 return notCodeRangeBuff(bbuf2); 307 } 308 309 if (not1) { 310 CodeRangeBuffer tbuf; 311 boolean tnot; 312 // swap 313 tnot = not1; not1 = not2; not2 = tnot; 314 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; 315 } 316 317 if (!not2 && !not1) { /* 1 OR 2 */ 318 pbuf = bbuf2.clone(); 319 } else if (!not1) { /* 1 OR (not 2) */ 320 pbuf = notCodeRangeBuff(bbuf2); 321 } 322 323 final int[]p1 = bbuf1.p; 324 final int n1 = p1[0]; 325 326 for (int i=0; i<n1; i++) { 327 final int from = p1[i * 2 + 1]; 328 final int to = p1[i * 2 + 2]; 329 pbuf = addCodeRangeToBuff(pbuf, from, to); 330 } 331 332 return pbuf; 333 } 334 335 // and_code_range1 336 public static CodeRangeBuffer andCodeRange1(final CodeRangeBuffer pbufp, final int from1p, final int to1p, final int[]data, final int n) { 337 CodeRangeBuffer pbuf = pbufp; 338 int from1 = from1p, to1 = to1p; 339 340 for (int i=0; i<n; i++) { 341 final int from2 = data[i * 2 + 1]; 342 final int to2 = data[i * 2 + 2]; 343 if (from2 < from1) { 344 if (to2 < from1) { 345 continue; 346 } 347 from1 = to2 + 1; 348 } else if (from2 <= to1) { 349 if (to2 < to1) { 350 if (from1 <= from2 - 1) { 351 pbuf = addCodeRangeToBuff(pbuf, from1, from2 - 1); 352 } 353 from1 = to2 + 1; 354 } else { 355 to1 = from2 - 1; 356 } 357 } else { 358 from1 = from2; 359 } 360 if (from1 > to1) { 361 break; 362 } 363 } 364 365 if (from1 <= to1) { 366 pbuf = addCodeRangeToBuff(pbuf, from1, to1); 367 } 368 369 return pbuf; 370 } 371 372 // and_code_range_buf 373 public static CodeRangeBuffer andCodeRangeBuff(final CodeRangeBuffer bbuf1p, final boolean not1p, 374 final CodeRangeBuffer bbuf2p, final boolean not2p) { 375 CodeRangeBuffer pbuf = null; 376 CodeRangeBuffer bbuf1 = bbuf1p; 377 CodeRangeBuffer bbuf2 = bbuf2p; 378 boolean not1 = not1p, not2 = not2p; 379 380 if (bbuf1 == null) { 381 if (not1 && bbuf2 != null) { 382 return bbuf2.clone(); /* not1 != 0 -> not2 == 0 */ 383 } 384 return null; 385 } else if (bbuf2 == null) { 386 if (not2) { 387 return bbuf1.clone(); 388 } 389 return null; 390 } 391 392 if (not1) { 393 CodeRangeBuffer tbuf; 394 boolean tnot; 395 // swap 396 tnot = not1; not1 = not2; not2 = tnot; 397 tbuf = bbuf1; bbuf1 = bbuf2; bbuf2 = tbuf; 398 } 399 400 final int[]p1 = bbuf1.p; 401 final int n1 = p1[0]; 402 final int[]p2 = bbuf2.p; 403 final int n2 = p2[0]; 404 405 if (!not2 && !not1) { /* 1 AND 2 */ 406 for (int i=0; i<n1; i++) { 407 final int from1 = p1[i * 2 + 1]; 408 final int to1 = p1[i * 2 + 2]; 409 410 for (int j=0; j<n2; j++) { 411 final int from2 = p2[j * 2 + 1]; 412 final int to2 = p2[j * 2 + 2]; 413 414 if (from2 > to1) { 415 break; 416 } 417 if (to2 < from1) { 418 continue; 419 } 420 final int from = from1 > from2 ? from1 : from2; 421 final int to = to1 < to2 ? to1 : to2; 422 pbuf = addCodeRangeToBuff(pbuf, from, to); 423 } 424 } 425 } else if (!not1) { /* 1 AND (not 2) */ 426 for (int i=0; i<n1; i++) { 427 final int from1 = p1[i * 2 + 1]; 428 final int to1 = p1[i * 2 + 2]; 429 pbuf = andCodeRange1(pbuf, from1, to1, p2, n2); 430 } 431 } 432 433 return pbuf; 434 } 435} 436