1/*
2 * reserved comment block
3 * DO NOT REMOVE OR ALTER!
4 */
5package com.sun.org.apache.bcel.internal.util;
6
7import java.util.ArrayList;
8import java.util.HashMap;
9import java.util.Iterator;
10import java.util.List;
11import java.util.Locale;
12import java.util.Map;
13import java.util.regex.Matcher;
14import java.util.regex.Pattern;
15
16import com.sun.org.apache.bcel.internal.Const;
17import com.sun.org.apache.bcel.internal.generic.ClassGenException;
18import com.sun.org.apache.bcel.internal.generic.InstructionHandle;
19import com.sun.org.apache.bcel.internal.generic.InstructionList;
20
21/**
22 * InstructionFinder is a tool to search for given instructions patterns, i.e.,
23 * match sequences of instructions in an instruction list via regular
24 * expressions. This can be used, e.g., in order to implement a peep hole
25 * optimizer that looks for code patterns and replaces them with faster
26 * equivalents.
27 *
28 * <p>
29 * This class internally uses the java.util.regex
30 * package to search for regular expressions.
31 *
32 * A typical application would look like this:
33 *
34 * <pre>
35 *
36 *
37 *   InstructionFinder f   = new InstructionFinder(il);
38 *   String            pat = &quot;IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)&quot;;
39 *
40 *   for (Iterator i = f.search(pat, constraint); i.hasNext(); ) {
41 *   InstructionHandle[] match = (InstructionHandle[])i.next();
42 *   ...
43 *   il.delete(match[1], match[5]);
44 *   ...
45 *   }
46 *
47 *
48 * </pre>
49 *
50 * @version $Id: InstructionFinder.java 1749603 2016-06-21 20:50:19Z ggregory $
51 * @see com.sun.org.apache.bcel.internal.generic.Instruction
52 * @see InstructionList
53 */
54public class InstructionFinder {
55
56    private static final int OFFSET = 32767; // char + OFFSET is outside of LATIN-1
57    private static final int NO_OPCODES = 256; // Potential number, some are not used
58    private static final Map<String, String> map = new HashMap<>();
59    private final InstructionList il;
60    private String il_string; // instruction list as string
61    private InstructionHandle[] handles; // map instruction
62
63
64    // list to array
65    /**
66     * @param il
67     *          instruction list to search for given patterns
68     */
69    public InstructionFinder(final InstructionList il) {
70        this.il = il;
71        reread();
72    }
73
74
75    /**
76     * Reread the instruction list, e.g., after you've altered the list upon a
77     * match.
78     */
79    public final void reread() {
80        final int size = il.getLength();
81        final char[] buf = new char[size]; // Create a string with length equal to il length
82        handles = il.getInstructionHandles();
83        // Map opcodes to characters
84        for (int i = 0; i < size; i++) {
85            buf[i] = makeChar(handles[i].getInstruction().getOpcode());
86        }
87        il_string = new String(buf);
88    }
89
90
91    /**
92     * Map symbolic instruction names like "getfield" to a single character.
93     *
94     * @param pattern
95     *          instruction pattern in lower case
96     * @return encoded string for a pattern such as "BranchInstruction".
97     */
98    private static String mapName( final String pattern ) {
99        final String result = map.get(pattern);
100        if (result != null) {
101            return result;
102        }
103        for (short i = 0; i < NO_OPCODES; i++) {
104            if (pattern.equals(Const.getOpcodeName(i))) {
105                return "" + makeChar(i);
106            }
107        }
108        throw new RuntimeException("Instruction unknown: " + pattern);
109    }
110
111
112    /**
113     * Replace symbolic names of instructions with the appropiate character and
114     * remove all white space from string. Meta characters such as +, * are
115     * ignored.
116     *
117     * @param pattern
118     *          The pattern to compile
119     * @return translated regular expression string
120     */
121    private static String compilePattern( final String pattern ) {
122        //Bug: BCEL-77 - Instructions are assumed to be english, to avoid odd Locale issues
123        final String lower = pattern.toLowerCase(Locale.ENGLISH);
124        final StringBuilder buf = new StringBuilder();
125        final int size = pattern.length();
126        for (int i = 0; i < size; i++) {
127            char ch = lower.charAt(i);
128            if (Character.isLetterOrDigit(ch)) {
129                final StringBuilder name = new StringBuilder();
130                while ((Character.isLetterOrDigit(ch) || ch == '_') && i < size) {
131                    name.append(ch);
132                    if (++i < size) {
133                        ch = lower.charAt(i);
134                    } else {
135                        break;
136                    }
137                }
138                i--;
139                buf.append(mapName(name.toString()));
140            } else if (!Character.isWhitespace(ch)) {
141                buf.append(ch);
142            }
143        }
144        return buf.toString();
145    }
146
147
148    /**
149     * @return the matched piece of code as an array of instruction (handles)
150     */
151    private InstructionHandle[] getMatch( final int matched_from, final int match_length ) {
152        final InstructionHandle[] match = new InstructionHandle[match_length];
153        System.arraycopy(handles, matched_from, match, 0, match_length);
154        return match;
155    }
156
157
158    /**
159     * Search for the given pattern in the instruction list. You can search for
160     * any valid opcode via its symbolic name, e.g. "istore". You can also use a
161     * super class or an interface name to match a whole set of instructions, e.g.
162     * "BranchInstruction" or "LoadInstruction". "istore" is also an alias for all
163     * "istore_x" instructions. Additional aliases are "if" for "ifxx", "if_icmp"
164     * for "if_icmpxx", "if_acmp" for "if_acmpxx".
165     *
166     * Consecutive instruction names must be separated by white space which will
167     * be removed during the compilation of the pattern.
168     *
169     * For the rest the usual pattern matching rules for regular expressions
170     * apply.
171     * <P>
172     * Example pattern:
173     *
174     * <pre>
175     * search(&quot;BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*&quot;);
176     * </pre>
177     *
178     * <p>
179     * If you alter the instruction list upon a match such that other matching
180     * areas are affected, you should call reread() to update the finder and call
181     * search() again, because the matches are cached.
182     *
183     * @param pattern
184     *          the instruction pattern to search for, where case is ignored
185     * @param from
186     *          where to start the search in the instruction list
187     * @param constraint
188     *          optional CodeConstraint to check the found code pattern for
189     *          user-defined constraints
190     * @return iterator of matches where e.nextElement() returns an array of
191     *         instruction handles describing the matched area
192     */
193    public final Iterator<InstructionHandle[]> search( final String pattern,
194            final InstructionHandle from, final CodeConstraint constraint ) {
195        final String search = compilePattern(pattern);
196        int start = -1;
197        for (int i = 0; i < handles.length; i++) {
198            if (handles[i] == from) {
199                start = i; // Where to start search from (index)
200                break;
201            }
202        }
203        if (start == -1) {
204            throw new ClassGenException("Instruction handle " + from
205                    + " not found in instruction list.");
206        }
207        final Pattern regex = Pattern.compile(search);
208        final List<InstructionHandle[]> matches = new ArrayList<>();
209        final Matcher matcher = regex.matcher(il_string);
210        while (start < il_string.length() && matcher.find(start)) {
211            final int startExpr = matcher.start();
212            final int endExpr = matcher.end();
213            final int lenExpr = endExpr - startExpr;
214            final InstructionHandle[] match = getMatch(startExpr, lenExpr);
215            if ((constraint == null) || constraint.checkCode(match)) {
216                matches.add(match);
217            }
218            start = endExpr;
219        }
220        return matches.iterator();
221    }
222
223
224    /**
225     * Start search beginning from the start of the given instruction list.
226     *
227     * @param pattern
228     *          the instruction pattern to search for, where case is ignored
229     * @return iterator of matches where e.nextElement() returns an array of
230     *         instruction handles describing the matched area
231     */
232    public final Iterator<InstructionHandle[]> search( final String pattern ) {
233        return search(pattern, il.getStart(), null);
234    }
235
236
237    /**
238     * Start search beginning from `from'.
239     *
240     * @param pattern
241     *          the instruction pattern to search for, where case is ignored
242     * @param from
243     *          where to start the search in the instruction list
244     * @return iterator of matches where e.nextElement() returns an array of
245     *         instruction handles describing the matched area
246     */
247    public final Iterator<InstructionHandle[]> search( final String pattern,
248            final InstructionHandle from ) {
249        return search(pattern, from, null);
250    }
251
252
253    /**
254     * Start search beginning from the start of the given instruction list. Check
255     * found matches with the constraint object.
256     *
257     * @param pattern
258     *          the instruction pattern to search for, case is ignored
259     * @param constraint
260     *          constraints to be checked on matching code
261     * @return instruction handle or `null' if the match failed
262     */
263    public final Iterator<InstructionHandle[]> search( final String pattern,
264            final CodeConstraint constraint ) {
265        return search(pattern, il.getStart(), constraint);
266    }
267
268
269    /**
270     * Convert opcode number to char.
271     */
272    private static char makeChar( final short opcode ) {
273        return (char) (opcode + OFFSET);
274    }
275
276
277    /**
278     * @return the inquired instruction list
279     */
280    public final InstructionList getInstructionList() {
281        return il;
282    }
283
284    /**
285     * Code patterns found may be checked using an additional user-defined
286     * constraint object whether they really match the needed criterion. I.e.,
287     * check constraints that can not expressed with regular expressions.
288     *
289     */
290    public interface CodeConstraint {
291
292        /**
293         * @param match
294         *          array of instructions matching the requested pattern
295         * @return true if the matched area is really useful
296         */
297        boolean checkCode( InstructionHandle[] match );
298    }
299
300    // Initialize pattern map
301    static {
302        map.put("arithmeticinstruction","(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
303        map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial|invokedynamic)");
304        map.put("arrayinstruction", "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
305        map.put("gotoinstruction", "(goto|goto_w)");
306        map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
307        map.put("localvariableinstruction","(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
308        map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
309        map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)");
310        map.put("cpinstruction", "(ldc2_w|invokeinterface|invokedynamic|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
311        map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
312        map.put("branchinstruction", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
313        map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)");
314        map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)");
315        map.put("select", "(tableswitch|lookupswitch)");
316        map.put("ifinstruction", "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
317        map.put("jsrinstruction", "(jsr|jsr_w)");
318        map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)");
319        map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)");
320        map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)");
321        map.put("typedinstruction", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
322        map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
323        map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)");
324        map.put("indexedinstruction", "(lload|lstore|fload|ldc2_w|invokeinterface|invokedynamic|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
325        map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
326        map.put("stackproducer", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|invokedynamic|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
327        map.put("stackconsumer", "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|invokedynamic|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
328        map.put("exceptionthrower","(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|invokedynamic|ldc|invokestatic|daload)");
329        map.put("loadclass", "(multianewarray|invokeinterface|invokedynamic|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
330        map.put("instructiontargeter", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
331        // Some aliases
332        map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
333        map.put("if_acmp", "(if_acmpeq|if_acmpne)");
334        map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
335        // Precompile some aliases first
336        map.put("iconst", precompile(Const.ICONST_0, Const.ICONST_5, Const.ICONST_M1));
337        map.put("lconst", new String(new char[] { '(', makeChar(Const.LCONST_0), '|', makeChar(Const.LCONST_1), ')' }));
338        map.put("dconst", new String(new char[] { '(', makeChar(Const.DCONST_0), '|', makeChar(Const.DCONST_1), ')' }));
339        map.put("fconst", new String(new char[] { '(', makeChar(Const.FCONST_0), '|', makeChar(Const.FCONST_1), '|', makeChar(Const.FCONST_2), ')' }));
340        map.put("lload", precompile(Const.LLOAD_0, Const.LLOAD_3, Const.LLOAD));
341        map.put("iload", precompile(Const.ILOAD_0, Const.ILOAD_3, Const.ILOAD));
342        map.put("dload", precompile(Const.DLOAD_0, Const.DLOAD_3, Const.DLOAD));
343        map.put("fload", precompile(Const.FLOAD_0, Const.FLOAD_3, Const.FLOAD));
344        map.put("aload", precompile(Const.ALOAD_0, Const.ALOAD_3, Const.ALOAD));
345        map.put("lstore", precompile(Const.LSTORE_0, Const.LSTORE_3, Const.LSTORE));
346        map.put("istore", precompile(Const.ISTORE_0, Const.ISTORE_3, Const.ISTORE));
347        map.put("dstore", precompile(Const.DSTORE_0, Const.DSTORE_3, Const.DSTORE));
348        map.put("fstore", precompile(Const.FSTORE_0, Const.FSTORE_3, Const.FSTORE));
349        map.put("astore", precompile(Const.ASTORE_0, Const.ASTORE_3, Const.ASTORE));
350        // Compile strings
351        for (final Map.Entry<String, String> entry : map.entrySet()) {
352            final String key = entry.getKey();
353            final String value = entry.getValue();
354            final char ch = value.charAt(1); // Omit already precompiled patterns
355            if (ch < OFFSET) {
356                map.put(key, compilePattern(value)); // precompile all patterns
357            }
358        }
359        // Add instruction alias to match anything
360        final StringBuilder buf = new StringBuilder("(");
361        for (short i = 0; i < NO_OPCODES; i++) {
362            if (Const.getNoOfOperands(i) != Const.UNDEFINED) { // Not an invalid opcode
363                buf.append(makeChar(i));
364                if (i < NO_OPCODES - 1) {
365                    buf.append('|');
366                }
367            }
368        }
369        buf.append(')');
370        map.put("instruction", buf.toString());
371    }
372
373
374    private static String precompile( final short from, final short to, final short extra ) {
375        final StringBuilder buf = new StringBuilder("(");
376        for (short i = from; i <= to; i++) {
377            buf.append(makeChar(i));
378            buf.append('|');
379        }
380        buf.append(makeChar(extra));
381        buf.append(")");
382        return buf.toString();
383    }
384
385
386    /*
387     * Internal debugging routines.
388     */
389//    private static final String pattern2string( String pattern ) {
390//        return pattern2string(pattern, true);
391//    }
392
393
394//    private static final String pattern2string( String pattern, boolean make_string ) {
395//        StringBuffer buf = new StringBuffer();
396//        for (int i = 0; i < pattern.length(); i++) {
397//            char ch = pattern.charAt(i);
398//            if (ch >= OFFSET) {
399//                if (make_string) {
400//                    buf.append(Constants.getOpcodeName(ch - OFFSET));
401//                } else {
402//                    buf.append((ch - OFFSET));
403//                }
404//            } else {
405//                buf.append(ch);
406//            }
407//        }
408//        return buf.toString();
409//    }
410}
411