1/* 2 * Permission is hereby granted, free of charge, to any person obtaining a copy of 3 * this software and associated documentation files (the "Software"), to deal in 4 * the Software without restriction, including without limitation the rights to 5 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 6 * of the Software, and to permit persons to whom the Software is furnished to do 7 * so, subject to the following conditions: 8 * 9 * The above copyright notice and this permission notice shall be included in all 10 * copies or substantial portions of the Software. 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 15 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 16 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 17 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 18 * SOFTWARE. 19 */ 20package jdk.nashorn.internal.runtime.regexp.joni; 21 22import jdk.nashorn.internal.runtime.regexp.joni.constants.AnchorType; 23import jdk.nashorn.internal.runtime.regexp.joni.constants.RegexState; 24import jdk.nashorn.internal.runtime.regexp.joni.exception.ErrorMessages; 25import jdk.nashorn.internal.runtime.regexp.joni.exception.ValueException; 26 27@SuppressWarnings("javadoc") 28public final class Regex implements RegexState { 29 30 int[] code; /* compiled pattern */ 31 int codeLength; 32 boolean stackNeeded; 33 Object[] operands; /* e.g. shared CClassNode */ 34 int operandLength; 35 36 int numMem; /* used memory(...) num counted from 1 */ 37 int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ 38 int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ 39 int captureHistory; /* (?@...) flag (1-31) */ 40 int btMemStart; /* need backtrack flag */ 41 int btMemEnd; /* need backtrack flag */ 42 43 int stackPopLevel; 44 45 int[] repeatRangeLo; 46 int[] repeatRangeHi; 47 48 WarnCallback warnings; 49 MatcherFactory factory; 50 protected Analyser analyser; 51 52 int options; 53 final int caseFoldFlag; 54 55 /* optimization info (string search, char-map and anchors) */ 56 SearchAlgorithm searchAlgorithm; /* optimize flag */ 57 int thresholdLength; /* search str-length for apply optimize */ 58 int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ 59 int anchorDmin; /* (SEMI_)END_BUF anchor distance */ 60 int anchorDmax; /* (SEMI_)END_BUF anchor distance */ 61 int subAnchor; /* start-anchor for exact or map */ 62 63 char[] exact; 64 int exactP; 65 int exactEnd; 66 67 byte[] map; /* used as BM skip or char-map */ 68 int[] intMap; /* BM skip for exact_len > 255 */ 69 int[] intMapBackward; /* BM skip for backward search */ 70 int dMin; /* min-distance of exact or map */ 71 int dMax; /* max-distance of exact or map */ 72 73 char[][] templates; 74 int templateNum; 75 76 public Regex(final CharSequence cs) { 77 this(cs.toString()); 78 } 79 80 public Regex(final String str) { 81 this(str.toCharArray(), 0, str.length(), 0); 82 } 83 84 public Regex(final char[] chars) { 85 this(chars, 0, chars.length, 0); 86 } 87 88 public Regex(final char[] chars, final int p, final int end) { 89 this(chars, p, end, 0); 90 } 91 92 public Regex(final char[] chars, final int p, final int end, final int option) { 93 this(chars, p, end, option, Syntax.RUBY, WarnCallback.DEFAULT); 94 } 95 96 // onig_new 97 public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax) { 98 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, WarnCallback.DEFAULT); 99 } 100 101 public Regex(final char[]chars, final int p, final int end, final int option, final WarnCallback warnings) { 102 this(chars, p, end, option, Syntax.RUBY, warnings); 103 } 104 105 // onig_new 106 public Regex(final char[] chars, final int p, final int end, final int option, final Syntax syntax, final WarnCallback warnings) { 107 this(chars, p, end, option, Config.ENC_CASE_FOLD_DEFAULT, syntax, warnings); 108 } 109 110 // onig_alloc_init 111 public Regex(final char[] chars, final int p, final int end, final int optionp, final int caseFoldFlag, final Syntax syntax, final WarnCallback warnings) { 112 int option = optionp; 113 114 if ((option & (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) == 115 (Option.DONT_CAPTURE_GROUP | Option.CAPTURE_GROUP)) { 116 throw new ValueException(ErrorMessages.ERR_INVALID_COMBINATION_OF_OPTIONS); 117 } 118 119 if ((option & Option.NEGATE_SINGLELINE) != 0) { 120 option |= syntax.options; 121 option &= ~Option.SINGLELINE; 122 } else { 123 option |= syntax.options; 124 } 125 126 this.options = option; 127 this.caseFoldFlag = caseFoldFlag; 128 this.warnings = warnings; 129 130 new Analyser(new ScanEnvironment(this, syntax), chars, p, end).compile(); 131 132 this.warnings = null; 133 } 134 135 public Matcher matcher(final char[] chars) { 136 return matcher(chars, 0, chars.length); 137 } 138 139 public Matcher matcher(final char[] chars, final int p, final int end) { 140 return factory.create(this, chars, p, end); 141 } 142 143 public WarnCallback getWarnings() { 144 return warnings; 145 } 146 147 public int numberOfCaptures() { 148 return numMem; 149 } 150 151 /* set skip map for Boyer-Moor search */ 152 void setupBMSkipMap() { 153 final char[] chars = exact; 154 final int p = exactP; 155 final int end = exactEnd; 156 final int len = end - p; 157 158 if (len < Config.CHAR_TABLE_SIZE) { 159 // map/skip 160 if (map == null) { 161 map = new byte[Config.CHAR_TABLE_SIZE]; 162 } 163 164 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { 165 map[i] = (byte)len; 166 } 167 for (int i=0; i<len-1; i++) 168 { 169 map[chars[p + i] & 0xff] = (byte)(len - 1 -i); // oxff ?? 170 } 171 } else { 172 if (intMap == null) { 173 intMap = new int[Config.CHAR_TABLE_SIZE]; 174 } 175 176 for (int i=0; i<len-1; i++) 177 { 178 intMap[chars[p + i] & 0xff] = len - 1 - i; // oxff ?? 179 } 180 } 181 } 182 183 void setExactInfo(final OptExactInfo e) { 184 if (e.length == 0) { 185 return; 186 } 187 188 // shall we copy that ? 189 exact = e.chars; 190 exactP = 0; 191 exactEnd = e.length; 192 193 if (e.ignoreCase) { 194 searchAlgorithm = new SearchAlgorithm.SLOW_IC(this); 195 } else { 196 if (e.length >= 2) { 197 setupBMSkipMap(); 198 searchAlgorithm = SearchAlgorithm.BM; 199 } else { 200 searchAlgorithm = SearchAlgorithm.SLOW; 201 } 202 } 203 204 dMin = e.mmd.min; 205 dMax = e.mmd.max; 206 207 if (dMin != MinMaxLen.INFINITE_DISTANCE) { 208 thresholdLength = dMin + (exactEnd - exactP); 209 } 210 } 211 212 void setOptimizeMapInfo(final OptMapInfo m) { 213 map = m.map; 214 215 searchAlgorithm = SearchAlgorithm.MAP; 216 dMin = m.mmd.min; 217 dMax = m.mmd.max; 218 219 if (dMin != MinMaxLen.INFINITE_DISTANCE) { 220 thresholdLength = dMin + 1; 221 } 222 } 223 224 void setSubAnchor(final OptAnchorInfo anc) { 225 subAnchor |= anc.leftAnchor & AnchorType.BEGIN_LINE; 226 subAnchor |= anc.rightAnchor & AnchorType.END_LINE; 227 } 228 229 void clearOptimizeInfo() { 230 searchAlgorithm = SearchAlgorithm.NONE; 231 anchor = 0; 232 anchorDmax = 0; 233 anchorDmin = 0; 234 subAnchor = 0; 235 236 exact = null; 237 exactP = exactEnd = 0; 238 } 239 240 public String optimizeInfoToString() { 241 final StringBuilder s = new StringBuilder(); 242 s.append("optimize: ").append(searchAlgorithm.getName()).append("\n"); 243 s.append(" anchor: ").append(OptAnchorInfo.anchorToString(anchor)); 244 245 if ((anchor & AnchorType.END_BUF_MASK) != 0) { 246 s.append(MinMaxLen.distanceRangeToString(anchorDmin, anchorDmax)); 247 } 248 249 s.append("\n"); 250 251 if (searchAlgorithm != SearchAlgorithm.NONE) { 252 s.append(" sub anchor: ").append(OptAnchorInfo.anchorToString(subAnchor)).append("\n"); 253 } 254 255 s.append("dmin: ").append(dMin).append(" dmax: ").append(dMax).append("\n"); 256 s.append("threshold length: ").append(thresholdLength).append("\n"); 257 258 if (exact != null) { 259 s.append("exact: [").append(exact, exactP, exactEnd - exactP).append("]: length: ").append(exactEnd - exactP).append("\n"); 260 } else if (searchAlgorithm == SearchAlgorithm.MAP) { 261 int n=0; 262 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { 263 if (map[i] != 0) { 264 n++; 265 } 266 } 267 268 s.append("map: n = ").append(n).append("\n"); 269 if (n > 0) { 270 int c=0; 271 s.append("["); 272 for (int i=0; i<Config.CHAR_TABLE_SIZE; i++) { 273 if (map[i] != 0) { 274 if (c > 0) { 275 s.append(", "); 276 } 277 c++; 278 // TODO if (enc.isPrint(i) 279 s.append((char)i); 280 } 281 } 282 s.append("]\n"); 283 } 284 } 285 286 return s.toString(); 287 } 288 289 public int getOptions() { 290 return options; 291 } 292 293 public String dumpTree() { 294 return analyser == null ? null : analyser.root.toString(); 295 } 296 297 public String dumpByteCode() { 298 return new ByteCodePrinter(this).byteCodeListToString(); 299 } 300 301} 302