InstructionFinder.java revision 628:2bfaf29cc90b
1/*
2 * reserved comment block
3 * DO NOT REMOVE OR ALTER!
4 */
5package com.sun.org.apache.bcel.internal.util;
6
7/* ====================================================================
8 * The Apache Software License, Version 1.1
9 *
10 * Copyright (c) 2001 The Apache Software Foundation.  All rights
11 * reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright
18 *    notice, this list of conditions and the following disclaimer.
19 *
20 * 2. Redistributions in binary form must reproduce the above copyright
21 *    notice, this list of conditions and the following disclaimer in
22 *    the documentation and/or other materials provided with the
23 *    distribution.
24 *
25 * 3. The end-user documentation included with the redistribution,
26 *    if any, must include the following acknowledgment:
27 *       "This product includes software developed by the
28 *        Apache Software Foundation (http://www.apache.org/)."
29 *    Alternately, this acknowledgment may appear in the software itself,
30 *    if and wherever such third-party acknowledgments normally appear.
31 *
32 * 4. The names "Apache" and "Apache Software Foundation" and
33 *    "Apache BCEL" must not be used to endorse or promote products
34 *    derived from this software without prior written permission. For
35 *    written permission, please contact apache@apache.org.
36 *
37 * 5. Products derived from this software may not be called "Apache",
38 *    "Apache BCEL", nor may "Apache" appear in their name, without
39 *    prior written permission of the Apache Software Foundation.
40 *
41 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
42 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
43 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
44 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
45 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
46 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
47 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
48 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
49 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
50 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
51 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
52 * SUCH DAMAGE.
53 * ====================================================================
54 *
55 * This software consists of voluntary contributions made by many
56 * individuals on behalf of the Apache Software Foundation.  For more
57 * information on the Apache Software Foundation, please see
58 * <http://www.apache.org/>.
59 */
60
61import java.util.*;
62import com.sun.org.apache.bcel.internal.Constants;
63import com.sun.org.apache.bcel.internal.generic.*;
64import com.sun.org.apache.regexp.internal.*;
65
66/**
67 * InstructionFinder is a tool to search for given instructions patterns,
68 * i.e., match sequences of instructions in an instruction list via
69 * regular expressions. This can be used, e.g., in order to implement
70 * a peep hole optimizer that looks for code patterns and replaces
71 * them with faster equivalents.
72 *
73 * <p>This class internally uses the <a href="http://jakarta.apache.org/regexp/">
74 * Regexp</a> package to search for regular expressions.
75 *
76 * A typical application would look like this:
77<pre>
78    InstructionFinder f   = new InstructionFinder(il);
79    String            pat = "IfInstruction ICONST_0 GOTO ICONST_1 NOP (IFEQ|IFNE)";
80
81    for(Iterator i = f.search(pat, constraint); i.hasNext(); ) {
82      InstructionHandle[] match = (InstructionHandle[])i.next();
83      ...
84      il.delete(match[1], match[5]);
85      ...
86    }
87</pre>
88 * @author  <A HREF="http://www.berlin.de/~markus.dahm/">M. Dahm</A>
89 * @see Instruction
90 * @see InstructionList
91 */
92public class InstructionFinder {
93  private static final int OFFSET     = 32767; // char + OFFSET is outside of LATIN-1
94  private static final int NO_OPCODES = 256;   // Potential number, some are not used
95
96  private static final HashMap map = new HashMap(); // Map<String,Pattern>
97
98  private InstructionList     il;
99  private String              il_string;    // instruction list as string
100  private InstructionHandle[] handles;      // map instruction list to array
101
102  /**
103   * @param il instruction list to search for given patterns
104   */
105  public InstructionFinder(InstructionList il) {
106    this.il = il;
107    reread();
108  }
109
110  /**
111   * Reread the instruction list, e.g., after you've altered the list upon a match.
112   */
113  public final void reread() {
114    int    size  = il.getLength();
115    char[] buf   = new char[size]; // Create a string with length equal to il length
116    handles      = il.getInstructionHandles();
117
118    // Map opcodes to characters
119    for(int i=0; i < size; i++)
120      buf[i] = makeChar(handles[i].getInstruction().getOpcode());
121
122    il_string = new String(buf);
123  }
124
125  /**
126   * Map symbolic instruction names like "getfield" to a single character.
127   *
128   * @param pattern instruction pattern in lower case
129   * @return encoded string for a pattern such as "BranchInstruction".
130   */
131  private static final String mapName(String pattern) {
132    String result = (String)map.get(pattern);
133
134    if(result != null)
135      return result;
136
137    for(short i=0; i < NO_OPCODES; i++)
138      if(pattern.equals(Constants.OPCODE_NAMES[i]))
139        return "" + makeChar(i);
140
141    throw new RuntimeException("Instruction unknown: " + pattern);
142  }
143
144  /**
145   * Replace symbolic names of instructions with the appropiate character and remove
146   * all white space from string. Meta characters such as +, * are ignored.
147   *
148   * @param pattern The pattern to compile
149   * @return translated regular expression string
150   */
151  private static final String compilePattern(String pattern) {
152    String       lower      = pattern.toLowerCase();
153    StringBuffer buf        = new StringBuffer();
154    int          size       = pattern.length();
155
156    for(int i=0; i < size; i++) {
157      char ch = lower.charAt(i);
158
159      if(Character.isLetterOrDigit(ch)) {
160        StringBuffer name = new StringBuffer();
161
162        while((Character.isLetterOrDigit(ch) || ch == '_') && i < size) {
163          name.append(ch);
164
165          if(++i < size)
166            ch = lower.charAt(i);
167          else
168            break;
169        }
170
171        i--;
172
173        buf.append(mapName(name.toString()));
174      } else if(!Character.isWhitespace(ch))
175        buf.append(ch);
176    }
177
178    return buf.toString();
179  }
180
181  /**
182   * @return the matched piece of code as an array of instruction (handles)
183   */
184  private InstructionHandle[] getMatch(int matched_from, int match_length) {
185    InstructionHandle[] match = new InstructionHandle[match_length];
186    System.arraycopy(handles, matched_from, match, 0, match_length);
187
188    return match;
189  }
190
191  /**
192   * Search for the given pattern in the instruction list. You can search for any valid
193   * opcode via its symbolic name, e.g. "istore". You can also use a super class or
194   * an interface name to match a whole set of instructions, e.g. "BranchInstruction" or
195   * "LoadInstruction". "istore" is also an alias for all "istore_x" instructions. Additional
196   * aliases are "if" for "ifxx", "if_icmp" for "if_icmpxx", "if_acmp" for "if_acmpxx".
197   *
198   * Consecutive instruction names must be separated by white space which will be removed
199   * during the compilation of the pattern.
200   *
201   * For the rest the usual pattern matching rules for regular expressions apply.<P>
202   * Example pattern:
203   * <pre>
204     search("BranchInstruction NOP ((IfInstruction|GOTO)+ ISTORE Instruction)*");
205   * </pre>
206   *
207   * <p>If you alter the instruction list upon a match such that other
208   * matching areas are affected, you should call reread() to update
209   * the finder and call search() again, because the matches are cached.
210   *
211   * @param pattern the instruction pattern to search for, where case is ignored
212   * @param from where to start the search in the instruction list
213   * @param constraint optional CodeConstraint to check the found code pattern for
214   * user-defined constraints
215   * @return iterator of matches where e.nextElement() returns an array of instruction handles
216   * describing the matched area
217   */
218  public final Iterator search(String pattern, InstructionHandle from,
219                               CodeConstraint constraint)
220  {
221    String search = compilePattern(pattern);
222    int  start    = -1;
223
224    for(int i=0; i < handles.length; i++) {
225      if(handles[i] == from) {
226        start = i; // Where to start search from (index)
227        break;
228      }
229    }
230
231    if(start == -1)
232      throw new ClassGenException("Instruction handle " + from +
233                                  " not found in instruction list.");
234    try {
235      RE regex = new RE(search);
236      ArrayList matches = new ArrayList();
237
238      while(start < il_string.length() && regex.match(il_string, start)) {
239        int startExpr = regex.getParenStart(0);
240        int endExpr   = regex.getParenEnd(0);
241        int lenExpr   = regex.getParenLength(0);
242
243        InstructionHandle[] match = getMatch(startExpr, lenExpr);
244
245        if((constraint == null) || constraint.checkCode(match))
246          matches.add(match);
247        start = endExpr;
248      }
249
250      return matches.iterator();
251    } catch(RESyntaxException e) {
252      System.err.println(e);
253    }
254
255    return null;
256  }
257
258  /**
259   * Start search beginning from the start of the given instruction list.
260   *
261   * @param pattern the instruction pattern to search for, where case is ignored
262   * @return iterator of matches where e.nextElement()
263   * returns an array of instruction handles describing the matched
264   * area
265   */
266  public final Iterator search(String pattern) {
267    return search(pattern, il.getStart(), null);
268  }
269
270  /**
271   * Start search beginning from `from'.
272   *
273   * @param pattern the instruction pattern to search for, where case is ignored
274   * @param from where to start the search in the instruction list
275   * @return  iterator of matches where e.nextElement() returns an array of instruction handles
276   * describing the matched area
277   */
278  public final Iterator search(String pattern, InstructionHandle from) {
279    return search(pattern, from, null);
280  }
281
282  /**
283   * Start search beginning from the start of the given instruction list.
284   * Check found matches with the constraint object.
285   *
286   * @param pattern the instruction pattern to search for, case is ignored
287   * @param constraint constraints to be checked on matching code
288   * @return instruction handle or `null' if the match failed
289   */
290  public final Iterator search(String pattern, CodeConstraint constraint) {
291    return search(pattern, il.getStart(), constraint);
292  }
293
294  /**
295   * Convert opcode number to char.
296   */
297  private static final char makeChar(short opcode) {
298    return (char)(opcode + OFFSET);
299  }
300
301  /**
302   * @return the inquired instruction list
303   */
304  public final InstructionList getInstructionList() { return il; }
305
306  /**
307   * Code patterns found may be checked using an additional
308   * user-defined constraint object whether they really match the needed criterion.
309   * I.e., check constraints that can not expressed with regular expressions.
310   *
311   */
312  public interface CodeConstraint {
313    /**
314     * @param match array of instructions matching the requested pattern
315     * @return true if the matched area is really useful
316     */
317    public boolean checkCode(InstructionHandle[] match);
318  }
319
320  // Initialize pattern map
321
322  static {
323    map.put("arithmeticinstruction", "(irem|lrem|iand|ior|ineg|isub|lneg|fneg|fmul|ldiv|fadd|lxor|frem|idiv|land|ixor|ishr|fsub|lshl|fdiv|iadd|lor|dmul|lsub|ishl|imul|lmul|lushr|dneg|iushr|lshr|ddiv|drem|dadd|ladd|dsub)");
324    map.put("invokeinstruction", "(invokevirtual|invokeinterface|invokestatic|invokespecial)");
325    map.put("arrayinstruction", "(baload|aastore|saload|caload|fastore|lastore|iaload|castore|iastore|aaload|bastore|sastore|faload|laload|daload|dastore)");
326    map.put("gotoinstruction", "(goto|goto_w)");
327    map.put("conversioninstruction", "(d2l|l2d|i2s|d2i|l2i|i2b|l2f|d2f|f2i|i2d|i2l|f2d|i2c|f2l|i2f)");
328    map.put("localvariableinstruction", "(fstore|iinc|lload|dstore|dload|iload|aload|astore|istore|fload|lstore)");
329    map.put("loadinstruction", "(fload|dload|lload|iload|aload)");
330    map.put("fieldinstruction", "(getfield|putstatic|getstatic|putfield)");
331    map.put("cpinstruction", "(ldc2_w|invokeinterface|multianewarray|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|ldc_w|invokestatic|invokevirtual|putfield|ldc|new|anewarray)");
332    map.put("stackinstruction", "(dup2|swap|dup2_x2|pop|pop2|dup|dup2_x1|dup_x2|dup_x1)");
333    map.put("branchinstruction", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
334    map.put("returninstruction", "(lreturn|ireturn|freturn|dreturn|areturn|return)");
335    map.put("storeinstruction", "(istore|fstore|dstore|astore|lstore)");
336    map.put("select", "(tableswitch|lookupswitch)");
337    map.put("ifinstruction", "(ifeq|ifgt|if_icmpne|if_icmpeq|ifge|ifnull|ifne|if_icmple|if_icmpge|if_acmpeq|if_icmplt|if_acmpne|ifnonnull|iflt|if_icmpgt|ifle)");
338    map.put("jsrinstruction", "(jsr|jsr_w)");
339    map.put("variablelengthinstruction", "(tableswitch|jsr|goto|lookupswitch)");
340    map.put("unconditionalbranch", "(goto|jsr|jsr_w|athrow|goto_w)");
341    map.put("constantpushinstruction", "(dconst|bipush|sipush|fconst|iconst|lconst)");
342    map.put("typedinstruction", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dastore|ret|f2d|f2i|drem|iinc|i2c|checkcast|frem|lreturn|astore|lushr|daload|dneg|fastore|istore|lshl|ldiv|lstore|areturn|ishr|ldc_w|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|faload|sipush|iushr|caload|instanceof|invokespecial|putfield|fmul|ireturn|laload|d2f|lneg|ixor|i2l|fdiv|lastore|multianewarray|i2b|getstatic|i2d|putstatic|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|freturn|ldc|aconst_null|castore|lmul|ldc2_w|dadd|iconst|f2l|ddiv|dstore|land|jsr|anewarray|dmul|bipush|dsub|sastore|d2i|i2s|lshr|iadd|l2i|lload|bastore|fstore|fneg|iload|fadd|baload|fconst|ior|ineg|dreturn|l2f|lconst|getfield|invokevirtual|invokestatic|iastore)");
343    map.put("popinstruction", "(fstore|dstore|pop|pop2|astore|putstatic|istore|lstore)");
344    map.put("allocationinstruction", "(multianewarray|new|anewarray|newarray)");
345    map.put("indexedinstruction", "(lload|lstore|fload|ldc2_w|invokeinterface|multianewarray|astore|dload|putstatic|instanceof|getstatic|checkcast|getfield|invokespecial|dstore|istore|iinc|ldc_w|ret|fstore|invokestatic|iload|putfield|invokevirtual|ldc|new|aload|anewarray)");
346    map.put("pushinstruction", "(dup|lload|dup2|bipush|fload|ldc2_w|sipush|lconst|fconst|dload|getstatic|ldc_w|aconst_null|dconst|iload|ldc|iconst|aload)");
347    map.put("stackproducer", "(imul|lsub|aload|fload|lor|new|aaload|fcmpg|iand|iaload|lrem|idiv|d2l|isub|dcmpg|dup|f2d|f2i|drem|i2c|checkcast|frem|lushr|daload|dneg|lshl|ldiv|ishr|ldc_w|invokeinterface|lxor|ishl|l2d|i2f|faload|sipush|iushr|caload|instanceof|invokespecial|fmul|laload|d2f|lneg|ixor|i2l|fdiv|getstatic|i2b|swap|i2d|dup2|fcmpl|saload|ladd|irem|dload|jsr_w|dconst|dcmpl|fsub|ldc|arraylength|aconst_null|tableswitch|lmul|ldc2_w|iconst|dadd|f2l|ddiv|land|jsr|anewarray|dmul|bipush|dsub|d2i|newarray|i2s|lshr|iadd|lload|l2i|fneg|iload|fadd|baload|fconst|lookupswitch|ior|ineg|lconst|l2f|getfield|invokevirtual|invokestatic)");
348    map.put("stackconsumer", "(imul|lsub|lor|iflt|fcmpg|if_icmpgt|iand|ifeq|if_icmplt|lrem|ifnonnull|idiv|d2l|isub|dcmpg|dastore|if_icmpeq|f2d|f2i|drem|i2c|checkcast|frem|lreturn|astore|lushr|pop2|monitorexit|dneg|fastore|istore|lshl|ldiv|lstore|areturn|if_icmpge|ishr|monitorenter|invokeinterface|aastore|lxor|ishl|l2d|i2f|return|iushr|instanceof|invokespecial|fmul|ireturn|d2f|lneg|ixor|pop|i2l|ifnull|fdiv|lastore|i2b|if_acmpeq|ifge|swap|i2d|putstatic|fcmpl|ladd|irem|dcmpl|fsub|freturn|ifgt|castore|lmul|dadd|f2l|ddiv|dstore|land|if_icmpne|if_acmpne|dmul|dsub|sastore|ifle|d2i|i2s|lshr|iadd|l2i|bastore|fstore|fneg|fadd|ior|ineg|ifne|dreturn|l2f|if_icmple|getfield|invokevirtual|invokestatic|iastore)");
349    map.put("exceptionthrower", "(irem|lrem|laload|putstatic|baload|dastore|areturn|getstatic|ldiv|anewarray|iastore|castore|idiv|saload|lastore|fastore|putfield|lreturn|caload|getfield|return|aastore|freturn|newarray|instanceof|multianewarray|athrow|faload|iaload|aaload|dreturn|monitorenter|checkcast|bastore|arraylength|new|invokevirtual|sastore|ldc_w|ireturn|invokespecial|monitorexit|invokeinterface|ldc|invokestatic|daload)");
350    map.put("loadclass", "(multianewarray|invokeinterface|instanceof|invokespecial|putfield|checkcast|putstatic|invokevirtual|new|getstatic|invokestatic|getfield|anewarray)");
351    map.put("instructiontargeter", "(ifle|if_acmpne|if_icmpeq|if_acmpeq|ifnonnull|goto_w|iflt|ifnull|if_icmpne|tableswitch|if_icmple|ifeq|if_icmplt|jsr_w|if_icmpgt|ifgt|jsr|goto|ifne|ifge|lookupswitch|if_icmpge)");
352
353    // Some aliases
354    map.put("if_icmp", "(if_icmpne|if_icmpeq|if_icmple|if_icmpge|if_icmplt|if_icmpgt)");
355    map.put("if_acmp", "(if_acmpeq|if_acmpne)");
356    map.put("if", "(ifeq|ifne|iflt|ifge|ifgt|ifle)");
357
358    // Precompile some aliases first
359    map.put("iconst", precompile(Constants.ICONST_0, Constants.ICONST_5, Constants.ICONST_M1));
360    map.put("lconst", new String(new char[] { '(', makeChar(Constants.LCONST_0), '|',
361                                              makeChar(Constants.LCONST_1), ')' }));
362    map.put("dconst", new String(new char[] { '(', makeChar(Constants.DCONST_0), '|',
363                                              makeChar(Constants.DCONST_1), ')' }));
364    map.put("fconst", new String(new char[] { '(', makeChar(Constants.FCONST_0), '|',
365                                              makeChar(Constants.FCONST_1), ')' }));
366
367    map.put("iload", precompile(Constants.ILOAD_0, Constants.ILOAD_3, Constants.ILOAD));
368    map.put("dload", precompile(Constants.DLOAD_0, Constants.DLOAD_3, Constants.DLOAD));
369    map.put("fload", precompile(Constants.FLOAD_0, Constants.FLOAD_3, Constants.FLOAD));
370    map.put("aload", precompile(Constants.ALOAD_0, Constants.ALOAD_3, Constants.ALOAD));
371
372    map.put("istore", precompile(Constants.ISTORE_0, Constants.ISTORE_3, Constants.ISTORE));
373    map.put("dstore", precompile(Constants.DSTORE_0, Constants.DSTORE_3, Constants.DSTORE));
374    map.put("fstore", precompile(Constants.FSTORE_0, Constants.FSTORE_3, Constants.FSTORE));
375    map.put("astore", precompile(Constants.ASTORE_0, Constants.ASTORE_3, Constants.ASTORE));
376
377    // Compile strings
378
379    for(Iterator i = map.keySet().iterator(); i.hasNext(); ) {
380      String key   = (String)i.next();
381      String value = (String)map.get(key);
382
383      char ch = value.charAt(1); // Omit already precompiled patterns
384      if(ch < OFFSET) {
385        map.put(key, compilePattern(value)); // precompile all patterns
386      }
387    }
388
389    // Add instruction alias to match anything
390
391    StringBuffer buf = new StringBuffer("(");
392
393    for(short i=0; i < NO_OPCODES; i++) {
394      if(Constants.NO_OF_OPERANDS[i] != Constants.UNDEFINED) { // Not an invalid opcode
395        buf.append(makeChar(i));
396
397        if(i < NO_OPCODES - 1)
398          buf.append('|');
399      }
400    }
401    buf.append(')');
402
403    map.put("instruction", buf.toString());
404  }
405
406  private static String precompile(short from, short to, short extra) {
407    StringBuffer buf = new StringBuffer("(");
408
409    for(short i=from; i <= to; i++) {
410      buf.append(makeChar(i));
411      buf.append('|');
412    }
413
414    buf.append(makeChar(extra));
415    buf.append(")");
416    return buf.toString();
417  }
418
419  /*
420   * Internal debugging routines.
421   */
422  private static final String pattern2string(String pattern) {
423    return pattern2string(pattern, true);
424  }
425
426  private static final String pattern2string(String pattern, boolean make_string) {
427    StringBuffer buf = new StringBuffer();
428
429    for(int i=0; i < pattern.length(); i++) {
430      char ch = pattern.charAt(i);
431
432      if(ch >= OFFSET) {
433        if(make_string)
434          buf.append(Constants.OPCODE_NAMES[ch - OFFSET]);
435        else
436          buf.append((int)(ch - OFFSET));
437      } else
438        buf.append(ch);
439    }
440
441    return buf.toString();
442  }
443}
444