1/* 2 * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.xml.internal.bind.v2.util; 27 28import java.util.AbstractMap; 29import java.util.Arrays; 30import java.util.Collection; 31import java.util.WeakHashMap; 32 33/** 34 * Computes the string edit distance. 35 * 36 * <p> 37 * Refer to a computer science text book for the definition 38 * of the "string edit distance". 39 * 40 * @author 41 * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) 42 */ 43public class EditDistance { 44 45 /** 46 * Weak results cache to avoid additional computations. 47 * Because of high complexity caching is required. 48 */ 49 private static final WeakHashMap<AbstractMap.SimpleEntry<String,String>, Integer> CACHE = new WeakHashMap<AbstractMap.SimpleEntry<String, String>, Integer>(); 50 51 /** 52 * Computes the edit distance between two strings. 53 * 54 * <p> 55 * The complexity is O(nm) where n=a.length() and m=b.length(). 56 */ 57 public static int editDistance( String a, String b ) { 58 // let's check cache 59 AbstractMap.SimpleEntry<String,String> entry = new AbstractMap.SimpleEntry<String, String>(a, b); // using this class to avoid creation of my own which will handle PAIR of values 60 Integer result = null; 61 if (CACHE.containsKey(entry)) 62 result = CACHE.get(entry); // looks like we have it 63 64 if (result == null) { 65 result = new EditDistance(a, b).calc(); 66 CACHE.put(entry, result); // cache the result 67 } 68 return result; 69 } 70 71 /** 72 * Finds the string in the <code>group</code> closest to 73 * <code>key</code> and returns it. 74 * 75 * @return null if group.length==0. 76 */ 77 public static String findNearest( String key, String[] group ) { 78 return findNearest(key, Arrays.asList(group)); 79 } 80 81 /** 82 * Finds the string in the <code>group</code> closest to 83 * <code>key</code> and returns it. 84 * 85 * @return null if group.length==0. 86 */ 87 public static String findNearest( String key, Collection<String> group ) { 88 int c = Integer.MAX_VALUE; 89 String r = null; 90 91 for (String s : group) { 92 int ed = editDistance(key,s); 93 if( c>ed ) { 94 c = ed; 95 r = s; 96 } 97 } 98 return r; 99 } 100 101 /** cost vector. */ 102 private int[] cost; 103 /** back buffer. */ 104 private int[] back; 105 106 /** Two strings to be compared. */ 107 private final String a,b; 108 109 private EditDistance( String a, String b ) { 110 this.a=a; 111 this.b=b; 112 cost = new int[a.length()+1]; 113 back = new int[a.length()+1]; // back buffer 114 115 for( int i=0; i<=a.length(); i++ ) 116 cost[i] = i; 117 } 118 119 /** 120 * Swaps two buffers. 121 */ 122 private void flip() { 123 int[] t = cost; 124 cost = back; 125 back = t; 126 } 127 128 private int min(int a,int b,int c) { 129 return Math.min(a,Math.min(b,c)); 130 } 131 132 private int calc() { 133 for( int j=0; j<b.length(); j++ ) { 134 flip(); 135 cost[0] = j+1; 136 for( int i=0; i<a.length(); i++ ) { 137 int match = (a.charAt(i)==b.charAt(j))?0:1; 138 cost[i+1] = min( back[i]+match, cost[i]+1, back[i+1]+1 ); 139 } 140 } 141 return cost[a.length()]; 142 } 143} 144